1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
28 #include "double-int.h"
35 #include "fold-const.h"
36 #include "stringpool.h"
39 #include "stor-layout.h"
43 #include "hard-reg-set.h"
44 #include "insn-config.h"
45 #include "conditions.h"
47 #include "insn-codes.h"
48 #include "insn-attr.h"
54 #include "statistics.h"
56 #include "fixed-value.h"
64 #include "diagnostic-core.h"
67 #include "dominance.h"
73 #include "cfgcleanup.h"
74 #include "basic-block.h"
77 #include "target-def.h"
78 #include "common/common-target.h"
79 #include "langhooks.h"
83 #include "plugin-api.h"
86 #include "hash-table.h"
87 #include "tree-ssa-alias.h"
88 #include "internal-fn.h"
89 #include "gimple-fold.h"
91 #include "gimple-expr.h"
97 #include "tm-constrs.h"
101 #include "sched-int.h"
105 #include "diagnostic.h"
106 #include "dumpfile.h"
107 #include "tree-pass.h"
109 #include "pass_manager.h"
110 #include "target-globals.h"
111 #include "tree-vectorizer.h"
112 #include "shrink-wrap.h"
113 #include "builtins.h"
114 #include "rtl-iter.h"
115 #include "tree-iterator.h"
116 #include "tree-chkp.h"
117 #include "rtl-chkp.h"
119 static rtx legitimize_dllimport_symbol (rtx, bool);
120 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
121 static rtx legitimize_pe_coff_symbol (rtx, bool);
123 #ifndef CHECK_STACK_LIMIT
124 #define CHECK_STACK_LIMIT (-1)
127 /* Return index of given mode in mult and division cost tables. */
128 #define MODE_INDEX(mode) \
129 ((mode) == QImode ? 0 \
130 : (mode) == HImode ? 1 \
131 : (mode) == SImode ? 2 \
132 : (mode) == DImode ? 3 \
135 /* Processor costs (relative to an add) */
136 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
137 #define COSTS_N_BYTES(N) ((N) * 2)
139 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
141 static stringop_algs ix86_size_memcpy[2] = {
142 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
143 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
144 static stringop_algs ix86_size_memset[2] = {
145 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
146 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
149 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
150 COSTS_N_BYTES (2), /* cost of an add instruction */
151 COSTS_N_BYTES (3), /* cost of a lea instruction */
152 COSTS_N_BYTES (2), /* variable shift costs */
153 COSTS_N_BYTES (3), /* constant shift costs */
154 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
155 COSTS_N_BYTES (3), /* HI */
156 COSTS_N_BYTES (3), /* SI */
157 COSTS_N_BYTES (3), /* DI */
158 COSTS_N_BYTES (5)}, /* other */
159 0, /* cost of multiply per each bit set */
160 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
161 COSTS_N_BYTES (3), /* HI */
162 COSTS_N_BYTES (3), /* SI */
163 COSTS_N_BYTES (3), /* DI */
164 COSTS_N_BYTES (5)}, /* other */
165 COSTS_N_BYTES (3), /* cost of movsx */
166 COSTS_N_BYTES (3), /* cost of movzx */
167 0, /* "large" insn */
169 2, /* cost for loading QImode using movzbl */
170 {2, 2, 2}, /* cost of loading integer registers
171 in QImode, HImode and SImode.
172 Relative to reg-reg move (2). */
173 {2, 2, 2}, /* cost of storing integer registers */
174 2, /* cost of reg,reg fld/fst */
175 {2, 2, 2}, /* cost of loading fp registers
176 in SFmode, DFmode and XFmode */
177 {2, 2, 2}, /* cost of storing fp registers
178 in SFmode, DFmode and XFmode */
179 3, /* cost of moving MMX register */
180 {3, 3}, /* cost of loading MMX registers
181 in SImode and DImode */
182 {3, 3}, /* cost of storing MMX registers
183 in SImode and DImode */
184 3, /* cost of moving SSE register */
185 {3, 3, 3}, /* cost of loading SSE registers
186 in SImode, DImode and TImode */
187 {3, 3, 3}, /* cost of storing SSE registers
188 in SImode, DImode and TImode */
189 3, /* MMX or SSE register to integer */
190 0, /* size of l1 cache */
191 0, /* size of l2 cache */
192 0, /* size of prefetch block */
193 0, /* number of parallel prefetches */
195 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
196 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
197 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
198 COSTS_N_BYTES (2), /* cost of FABS instruction. */
199 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
200 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
203 1, /* scalar_stmt_cost. */
204 1, /* scalar load_cost. */
205 1, /* scalar_store_cost. */
206 1, /* vec_stmt_cost. */
207 1, /* vec_to_scalar_cost. */
208 1, /* scalar_to_vec_cost. */
209 1, /* vec_align_load_cost. */
210 1, /* vec_unalign_load_cost. */
211 1, /* vec_store_cost. */
212 1, /* cond_taken_branch_cost. */
213 1, /* cond_not_taken_branch_cost. */
216 /* Processor costs (relative to an add) */
217 static stringop_algs i386_memcpy[2] = {
218 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
219 DUMMY_STRINGOP_ALGS};
220 static stringop_algs i386_memset[2] = {
221 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
222 DUMMY_STRINGOP_ALGS};
225 struct processor_costs i386_cost = { /* 386 specific costs */
226 COSTS_N_INSNS (1), /* cost of an add instruction */
227 COSTS_N_INSNS (1), /* cost of a lea instruction */
228 COSTS_N_INSNS (3), /* variable shift costs */
229 COSTS_N_INSNS (2), /* constant shift costs */
230 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
231 COSTS_N_INSNS (6), /* HI */
232 COSTS_N_INSNS (6), /* SI */
233 COSTS_N_INSNS (6), /* DI */
234 COSTS_N_INSNS (6)}, /* other */
235 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
236 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
237 COSTS_N_INSNS (23), /* HI */
238 COSTS_N_INSNS (23), /* SI */
239 COSTS_N_INSNS (23), /* DI */
240 COSTS_N_INSNS (23)}, /* other */
241 COSTS_N_INSNS (3), /* cost of movsx */
242 COSTS_N_INSNS (2), /* cost of movzx */
243 15, /* "large" insn */
245 4, /* cost for loading QImode using movzbl */
246 {2, 4, 2}, /* cost of loading integer registers
247 in QImode, HImode and SImode.
248 Relative to reg-reg move (2). */
249 {2, 4, 2}, /* cost of storing integer registers */
250 2, /* cost of reg,reg fld/fst */
251 {8, 8, 8}, /* cost of loading fp registers
252 in SFmode, DFmode and XFmode */
253 {8, 8, 8}, /* cost of storing fp registers
254 in SFmode, DFmode and XFmode */
255 2, /* cost of moving MMX register */
256 {4, 8}, /* cost of loading MMX registers
257 in SImode and DImode */
258 {4, 8}, /* cost of storing MMX registers
259 in SImode and DImode */
260 2, /* cost of moving SSE register */
261 {4, 8, 16}, /* cost of loading SSE registers
262 in SImode, DImode and TImode */
263 {4, 8, 16}, /* cost of storing SSE registers
264 in SImode, DImode and TImode */
265 3, /* MMX or SSE register to integer */
266 0, /* size of l1 cache */
267 0, /* size of l2 cache */
268 0, /* size of prefetch block */
269 0, /* number of parallel prefetches */
271 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
272 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
273 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
274 COSTS_N_INSNS (22), /* cost of FABS instruction. */
275 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
276 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
292 static stringop_algs i486_memcpy[2] = {
293 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
294 DUMMY_STRINGOP_ALGS};
295 static stringop_algs i486_memset[2] = {
296 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
297 DUMMY_STRINGOP_ALGS};
300 struct processor_costs i486_cost = { /* 486 specific costs */
301 COSTS_N_INSNS (1), /* cost of an add instruction */
302 COSTS_N_INSNS (1), /* cost of a lea instruction */
303 COSTS_N_INSNS (3), /* variable shift costs */
304 COSTS_N_INSNS (2), /* constant shift costs */
305 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
306 COSTS_N_INSNS (12), /* HI */
307 COSTS_N_INSNS (12), /* SI */
308 COSTS_N_INSNS (12), /* DI */
309 COSTS_N_INSNS (12)}, /* other */
310 1, /* cost of multiply per each bit set */
311 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
312 COSTS_N_INSNS (40), /* HI */
313 COSTS_N_INSNS (40), /* SI */
314 COSTS_N_INSNS (40), /* DI */
315 COSTS_N_INSNS (40)}, /* other */
316 COSTS_N_INSNS (3), /* cost of movsx */
317 COSTS_N_INSNS (2), /* cost of movzx */
318 15, /* "large" insn */
320 4, /* cost for loading QImode using movzbl */
321 {2, 4, 2}, /* cost of loading integer registers
322 in QImode, HImode and SImode.
323 Relative to reg-reg move (2). */
324 {2, 4, 2}, /* cost of storing integer registers */
325 2, /* cost of reg,reg fld/fst */
326 {8, 8, 8}, /* cost of loading fp registers
327 in SFmode, DFmode and XFmode */
328 {8, 8, 8}, /* cost of storing fp registers
329 in SFmode, DFmode and XFmode */
330 2, /* cost of moving MMX register */
331 {4, 8}, /* cost of loading MMX registers
332 in SImode and DImode */
333 {4, 8}, /* cost of storing MMX registers
334 in SImode and DImode */
335 2, /* cost of moving SSE register */
336 {4, 8, 16}, /* cost of loading SSE registers
337 in SImode, DImode and TImode */
338 {4, 8, 16}, /* cost of storing SSE registers
339 in SImode, DImode and TImode */
340 3, /* MMX or SSE register to integer */
341 4, /* size of l1 cache. 486 has 8kB cache
342 shared for code and data, so 4kB is
343 not really precise. */
344 4, /* size of l2 cache */
345 0, /* size of prefetch block */
346 0, /* number of parallel prefetches */
348 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
349 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
350 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
351 COSTS_N_INSNS (3), /* cost of FABS instruction. */
352 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
353 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
356 1, /* scalar_stmt_cost. */
357 1, /* scalar load_cost. */
358 1, /* scalar_store_cost. */
359 1, /* vec_stmt_cost. */
360 1, /* vec_to_scalar_cost. */
361 1, /* scalar_to_vec_cost. */
362 1, /* vec_align_load_cost. */
363 2, /* vec_unalign_load_cost. */
364 1, /* vec_store_cost. */
365 3, /* cond_taken_branch_cost. */
366 1, /* cond_not_taken_branch_cost. */
369 static stringop_algs pentium_memcpy[2] = {
370 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
371 DUMMY_STRINGOP_ALGS};
372 static stringop_algs pentium_memset[2] = {
373 {libcall, {{-1, rep_prefix_4_byte, false}}},
374 DUMMY_STRINGOP_ALGS};
377 struct processor_costs pentium_cost = {
378 COSTS_N_INSNS (1), /* cost of an add instruction */
379 COSTS_N_INSNS (1), /* cost of a lea instruction */
380 COSTS_N_INSNS (4), /* variable shift costs */
381 COSTS_N_INSNS (1), /* constant shift costs */
382 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
383 COSTS_N_INSNS (11), /* HI */
384 COSTS_N_INSNS (11), /* SI */
385 COSTS_N_INSNS (11), /* DI */
386 COSTS_N_INSNS (11)}, /* other */
387 0, /* cost of multiply per each bit set */
388 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
389 COSTS_N_INSNS (25), /* HI */
390 COSTS_N_INSNS (25), /* SI */
391 COSTS_N_INSNS (25), /* DI */
392 COSTS_N_INSNS (25)}, /* other */
393 COSTS_N_INSNS (3), /* cost of movsx */
394 COSTS_N_INSNS (2), /* cost of movzx */
395 8, /* "large" insn */
397 6, /* cost for loading QImode using movzbl */
398 {2, 4, 2}, /* cost of loading integer registers
399 in QImode, HImode and SImode.
400 Relative to reg-reg move (2). */
401 {2, 4, 2}, /* cost of storing integer registers */
402 2, /* cost of reg,reg fld/fst */
403 {2, 2, 6}, /* cost of loading fp registers
404 in SFmode, DFmode and XFmode */
405 {4, 4, 6}, /* cost of storing fp registers
406 in SFmode, DFmode and XFmode */
407 8, /* cost of moving MMX register */
408 {8, 8}, /* cost of loading MMX registers
409 in SImode and DImode */
410 {8, 8}, /* cost of storing MMX registers
411 in SImode and DImode */
412 2, /* cost of moving SSE register */
413 {4, 8, 16}, /* cost of loading SSE registers
414 in SImode, DImode and TImode */
415 {4, 8, 16}, /* cost of storing SSE registers
416 in SImode, DImode and TImode */
417 3, /* MMX or SSE register to integer */
418 8, /* size of l1 cache. */
419 8, /* size of l2 cache */
420 0, /* size of prefetch block */
421 0, /* number of parallel prefetches */
423 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
424 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
425 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
426 COSTS_N_INSNS (1), /* cost of FABS instruction. */
427 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
428 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
431 1, /* scalar_stmt_cost. */
432 1, /* scalar load_cost. */
433 1, /* scalar_store_cost. */
434 1, /* vec_stmt_cost. */
435 1, /* vec_to_scalar_cost. */
436 1, /* scalar_to_vec_cost. */
437 1, /* vec_align_load_cost. */
438 2, /* vec_unalign_load_cost. */
439 1, /* vec_store_cost. */
440 3, /* cond_taken_branch_cost. */
441 1, /* cond_not_taken_branch_cost. */
444 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
445 (we ensure the alignment). For small blocks inline loop is still a
446 noticeable win, for bigger blocks either rep movsl or rep movsb is
447 way to go. Rep movsb has apparently more expensive startup time in CPU,
448 but after 4K the difference is down in the noise. */
449 static stringop_algs pentiumpro_memcpy[2] = {
450 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
451 {8192, rep_prefix_4_byte, false},
452 {-1, rep_prefix_1_byte, false}}},
453 DUMMY_STRINGOP_ALGS};
454 static stringop_algs pentiumpro_memset[2] = {
455 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
456 {8192, rep_prefix_4_byte, false},
457 {-1, libcall, false}}},
458 DUMMY_STRINGOP_ALGS};
460 struct processor_costs pentiumpro_cost = {
461 COSTS_N_INSNS (1), /* cost of an add instruction */
462 COSTS_N_INSNS (1), /* cost of a lea instruction */
463 COSTS_N_INSNS (1), /* variable shift costs */
464 COSTS_N_INSNS (1), /* constant shift costs */
465 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
466 COSTS_N_INSNS (4), /* HI */
467 COSTS_N_INSNS (4), /* SI */
468 COSTS_N_INSNS (4), /* DI */
469 COSTS_N_INSNS (4)}, /* other */
470 0, /* cost of multiply per each bit set */
471 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
472 COSTS_N_INSNS (17), /* HI */
473 COSTS_N_INSNS (17), /* SI */
474 COSTS_N_INSNS (17), /* DI */
475 COSTS_N_INSNS (17)}, /* other */
476 COSTS_N_INSNS (1), /* cost of movsx */
477 COSTS_N_INSNS (1), /* cost of movzx */
478 8, /* "large" insn */
480 2, /* cost for loading QImode using movzbl */
481 {4, 4, 4}, /* cost of loading integer registers
482 in QImode, HImode and SImode.
483 Relative to reg-reg move (2). */
484 {2, 2, 2}, /* cost of storing integer registers */
485 2, /* cost of reg,reg fld/fst */
486 {2, 2, 6}, /* cost of loading fp registers
487 in SFmode, DFmode and XFmode */
488 {4, 4, 6}, /* cost of storing fp registers
489 in SFmode, DFmode and XFmode */
490 2, /* cost of moving MMX register */
491 {2, 2}, /* cost of loading MMX registers
492 in SImode and DImode */
493 {2, 2}, /* cost of storing MMX registers
494 in SImode and DImode */
495 2, /* cost of moving SSE register */
496 {2, 2, 8}, /* cost of loading SSE registers
497 in SImode, DImode and TImode */
498 {2, 2, 8}, /* cost of storing SSE registers
499 in SImode, DImode and TImode */
500 3, /* MMX or SSE register to integer */
501 8, /* size of l1 cache. */
502 256, /* size of l2 cache */
503 32, /* size of prefetch block */
504 6, /* number of parallel prefetches */
506 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
507 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
508 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
509 COSTS_N_INSNS (2), /* cost of FABS instruction. */
510 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
511 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
514 1, /* scalar_stmt_cost. */
515 1, /* scalar load_cost. */
516 1, /* scalar_store_cost. */
517 1, /* vec_stmt_cost. */
518 1, /* vec_to_scalar_cost. */
519 1, /* scalar_to_vec_cost. */
520 1, /* vec_align_load_cost. */
521 2, /* vec_unalign_load_cost. */
522 1, /* vec_store_cost. */
523 3, /* cond_taken_branch_cost. */
524 1, /* cond_not_taken_branch_cost. */
527 static stringop_algs geode_memcpy[2] = {
528 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
529 DUMMY_STRINGOP_ALGS};
530 static stringop_algs geode_memset[2] = {
531 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
532 DUMMY_STRINGOP_ALGS};
534 struct processor_costs geode_cost = {
535 COSTS_N_INSNS (1), /* cost of an add instruction */
536 COSTS_N_INSNS (1), /* cost of a lea instruction */
537 COSTS_N_INSNS (2), /* variable shift costs */
538 COSTS_N_INSNS (1), /* constant shift costs */
539 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
540 COSTS_N_INSNS (4), /* HI */
541 COSTS_N_INSNS (7), /* SI */
542 COSTS_N_INSNS (7), /* DI */
543 COSTS_N_INSNS (7)}, /* other */
544 0, /* cost of multiply per each bit set */
545 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
546 COSTS_N_INSNS (23), /* HI */
547 COSTS_N_INSNS (39), /* SI */
548 COSTS_N_INSNS (39), /* DI */
549 COSTS_N_INSNS (39)}, /* other */
550 COSTS_N_INSNS (1), /* cost of movsx */
551 COSTS_N_INSNS (1), /* cost of movzx */
552 8, /* "large" insn */
554 1, /* cost for loading QImode using movzbl */
555 {1, 1, 1}, /* cost of loading integer registers
556 in QImode, HImode and SImode.
557 Relative to reg-reg move (2). */
558 {1, 1, 1}, /* cost of storing integer registers */
559 1, /* cost of reg,reg fld/fst */
560 {1, 1, 1}, /* cost of loading fp registers
561 in SFmode, DFmode and XFmode */
562 {4, 6, 6}, /* cost of storing fp registers
563 in SFmode, DFmode and XFmode */
565 1, /* cost of moving MMX register */
566 {1, 1}, /* cost of loading MMX registers
567 in SImode and DImode */
568 {1, 1}, /* cost of storing MMX registers
569 in SImode and DImode */
570 1, /* cost of moving SSE register */
571 {1, 1, 1}, /* cost of loading SSE registers
572 in SImode, DImode and TImode */
573 {1, 1, 1}, /* cost of storing SSE registers
574 in SImode, DImode and TImode */
575 1, /* MMX or SSE register to integer */
576 64, /* size of l1 cache. */
577 128, /* size of l2 cache. */
578 32, /* size of prefetch block */
579 1, /* number of parallel prefetches */
581 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
582 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
583 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
584 COSTS_N_INSNS (1), /* cost of FABS instruction. */
585 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
586 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
589 1, /* scalar_stmt_cost. */
590 1, /* scalar load_cost. */
591 1, /* scalar_store_cost. */
592 1, /* vec_stmt_cost. */
593 1, /* vec_to_scalar_cost. */
594 1, /* scalar_to_vec_cost. */
595 1, /* vec_align_load_cost. */
596 2, /* vec_unalign_load_cost. */
597 1, /* vec_store_cost. */
598 3, /* cond_taken_branch_cost. */
599 1, /* cond_not_taken_branch_cost. */
602 static stringop_algs k6_memcpy[2] = {
603 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
604 DUMMY_STRINGOP_ALGS};
605 static stringop_algs k6_memset[2] = {
606 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
607 DUMMY_STRINGOP_ALGS};
609 struct processor_costs k6_cost = {
610 COSTS_N_INSNS (1), /* cost of an add instruction */
611 COSTS_N_INSNS (2), /* cost of a lea instruction */
612 COSTS_N_INSNS (1), /* variable shift costs */
613 COSTS_N_INSNS (1), /* constant shift costs */
614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
615 COSTS_N_INSNS (3), /* HI */
616 COSTS_N_INSNS (3), /* SI */
617 COSTS_N_INSNS (3), /* DI */
618 COSTS_N_INSNS (3)}, /* other */
619 0, /* cost of multiply per each bit set */
620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
621 COSTS_N_INSNS (18), /* HI */
622 COSTS_N_INSNS (18), /* SI */
623 COSTS_N_INSNS (18), /* DI */
624 COSTS_N_INSNS (18)}, /* other */
625 COSTS_N_INSNS (2), /* cost of movsx */
626 COSTS_N_INSNS (2), /* cost of movzx */
627 8, /* "large" insn */
629 3, /* cost for loading QImode using movzbl */
630 {4, 5, 4}, /* cost of loading integer registers
631 in QImode, HImode and SImode.
632 Relative to reg-reg move (2). */
633 {2, 3, 2}, /* cost of storing integer registers */
634 4, /* cost of reg,reg fld/fst */
635 {6, 6, 6}, /* cost of loading fp registers
636 in SFmode, DFmode and XFmode */
637 {4, 4, 4}, /* cost of storing fp registers
638 in SFmode, DFmode and XFmode */
639 2, /* cost of moving MMX register */
640 {2, 2}, /* cost of loading MMX registers
641 in SImode and DImode */
642 {2, 2}, /* cost of storing MMX registers
643 in SImode and DImode */
644 2, /* cost of moving SSE register */
645 {2, 2, 8}, /* cost of loading SSE registers
646 in SImode, DImode and TImode */
647 {2, 2, 8}, /* cost of storing SSE registers
648 in SImode, DImode and TImode */
649 6, /* MMX or SSE register to integer */
650 32, /* size of l1 cache. */
651 32, /* size of l2 cache. Some models
652 have integrated l2 cache, but
653 optimizing for k6 is not important
654 enough to worry about that. */
655 32, /* size of prefetch block */
656 1, /* number of parallel prefetches */
658 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
659 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
660 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
661 COSTS_N_INSNS (2), /* cost of FABS instruction. */
662 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
663 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
666 1, /* scalar_stmt_cost. */
667 1, /* scalar load_cost. */
668 1, /* scalar_store_cost. */
669 1, /* vec_stmt_cost. */
670 1, /* vec_to_scalar_cost. */
671 1, /* scalar_to_vec_cost. */
672 1, /* vec_align_load_cost. */
673 2, /* vec_unalign_load_cost. */
674 1, /* vec_store_cost. */
675 3, /* cond_taken_branch_cost. */
676 1, /* cond_not_taken_branch_cost. */
679 /* For some reason, Athlon deals better with REP prefix (relative to loops)
680 compared to K8. Alignment becomes important after 8 bytes for memcpy and
681 128 bytes for memset. */
682 static stringop_algs athlon_memcpy[2] = {
683 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
684 DUMMY_STRINGOP_ALGS};
685 static stringop_algs athlon_memset[2] = {
686 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
687 DUMMY_STRINGOP_ALGS};
689 struct processor_costs athlon_cost = {
690 COSTS_N_INSNS (1), /* cost of an add instruction */
691 COSTS_N_INSNS (2), /* cost of a lea instruction */
692 COSTS_N_INSNS (1), /* variable shift costs */
693 COSTS_N_INSNS (1), /* constant shift costs */
694 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
695 COSTS_N_INSNS (5), /* HI */
696 COSTS_N_INSNS (5), /* SI */
697 COSTS_N_INSNS (5), /* DI */
698 COSTS_N_INSNS (5)}, /* other */
699 0, /* cost of multiply per each bit set */
700 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
701 COSTS_N_INSNS (26), /* HI */
702 COSTS_N_INSNS (42), /* SI */
703 COSTS_N_INSNS (74), /* DI */
704 COSTS_N_INSNS (74)}, /* other */
705 COSTS_N_INSNS (1), /* cost of movsx */
706 COSTS_N_INSNS (1), /* cost of movzx */
707 8, /* "large" insn */
709 4, /* cost for loading QImode using movzbl */
710 {3, 4, 3}, /* cost of loading integer registers
711 in QImode, HImode and SImode.
712 Relative to reg-reg move (2). */
713 {3, 4, 3}, /* cost of storing integer registers */
714 4, /* cost of reg,reg fld/fst */
715 {4, 4, 12}, /* cost of loading fp registers
716 in SFmode, DFmode and XFmode */
717 {6, 6, 8}, /* cost of storing fp registers
718 in SFmode, DFmode and XFmode */
719 2, /* cost of moving MMX register */
720 {4, 4}, /* cost of loading MMX registers
721 in SImode and DImode */
722 {4, 4}, /* cost of storing MMX registers
723 in SImode and DImode */
724 2, /* cost of moving SSE register */
725 {4, 4, 6}, /* cost of loading SSE registers
726 in SImode, DImode and TImode */
727 {4, 4, 5}, /* cost of storing SSE registers
728 in SImode, DImode and TImode */
729 5, /* MMX or SSE register to integer */
730 64, /* size of l1 cache. */
731 256, /* size of l2 cache. */
732 64, /* size of prefetch block */
733 6, /* number of parallel prefetches */
735 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
736 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
737 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
738 COSTS_N_INSNS (2), /* cost of FABS instruction. */
739 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
740 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
743 1, /* scalar_stmt_cost. */
744 1, /* scalar load_cost. */
745 1, /* scalar_store_cost. */
746 1, /* vec_stmt_cost. */
747 1, /* vec_to_scalar_cost. */
748 1, /* scalar_to_vec_cost. */
749 1, /* vec_align_load_cost. */
750 2, /* vec_unalign_load_cost. */
751 1, /* vec_store_cost. */
752 3, /* cond_taken_branch_cost. */
753 1, /* cond_not_taken_branch_cost. */
756 /* K8 has optimized REP instruction for medium sized blocks, but for very
757 small blocks it is better to use loop. For large blocks, libcall can
758 do nontemporary accesses and beat inline considerably. */
759 static stringop_algs k8_memcpy[2] = {
760 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
761 {-1, rep_prefix_4_byte, false}}},
762 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
763 {-1, libcall, false}}}};
764 static stringop_algs k8_memset[2] = {
765 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
766 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
767 {libcall, {{48, unrolled_loop, false},
768 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
770 struct processor_costs k8_cost = {
771 COSTS_N_INSNS (1), /* cost of an add instruction */
772 COSTS_N_INSNS (2), /* cost of a lea instruction */
773 COSTS_N_INSNS (1), /* variable shift costs */
774 COSTS_N_INSNS (1), /* constant shift costs */
775 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
776 COSTS_N_INSNS (4), /* HI */
777 COSTS_N_INSNS (3), /* SI */
778 COSTS_N_INSNS (4), /* DI */
779 COSTS_N_INSNS (5)}, /* other */
780 0, /* cost of multiply per each bit set */
781 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
782 COSTS_N_INSNS (26), /* HI */
783 COSTS_N_INSNS (42), /* SI */
784 COSTS_N_INSNS (74), /* DI */
785 COSTS_N_INSNS (74)}, /* other */
786 COSTS_N_INSNS (1), /* cost of movsx */
787 COSTS_N_INSNS (1), /* cost of movzx */
788 8, /* "large" insn */
790 4, /* cost for loading QImode using movzbl */
791 {3, 4, 3}, /* cost of loading integer registers
792 in QImode, HImode and SImode.
793 Relative to reg-reg move (2). */
794 {3, 4, 3}, /* cost of storing integer registers */
795 4, /* cost of reg,reg fld/fst */
796 {4, 4, 12}, /* cost of loading fp registers
797 in SFmode, DFmode and XFmode */
798 {6, 6, 8}, /* cost of storing fp registers
799 in SFmode, DFmode and XFmode */
800 2, /* cost of moving MMX register */
801 {3, 3}, /* cost of loading MMX registers
802 in SImode and DImode */
803 {4, 4}, /* cost of storing MMX registers
804 in SImode and DImode */
805 2, /* cost of moving SSE register */
806 {4, 3, 6}, /* cost of loading SSE registers
807 in SImode, DImode and TImode */
808 {4, 4, 5}, /* cost of storing SSE registers
809 in SImode, DImode and TImode */
810 5, /* MMX or SSE register to integer */
811 64, /* size of l1 cache. */
812 512, /* size of l2 cache. */
813 64, /* size of prefetch block */
814 /* New AMD processors never drop prefetches; if they cannot be performed
815 immediately, they are queued. We set number of simultaneous prefetches
816 to a large constant to reflect this (it probably is not a good idea not
817 to limit number of prefetches at all, as their execution also takes some
819 100, /* number of parallel prefetches */
821 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
822 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
823 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
824 COSTS_N_INSNS (2), /* cost of FABS instruction. */
825 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
826 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
830 4, /* scalar_stmt_cost. */
831 2, /* scalar load_cost. */
832 2, /* scalar_store_cost. */
833 5, /* vec_stmt_cost. */
834 0, /* vec_to_scalar_cost. */
835 2, /* scalar_to_vec_cost. */
836 2, /* vec_align_load_cost. */
837 3, /* vec_unalign_load_cost. */
838 3, /* vec_store_cost. */
839 3, /* cond_taken_branch_cost. */
840 2, /* cond_not_taken_branch_cost. */
843 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
844 very small blocks it is better to use loop. For large blocks, libcall can
845 do nontemporary accesses and beat inline considerably. */
846 static stringop_algs amdfam10_memcpy[2] = {
847 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
848 {-1, rep_prefix_4_byte, false}}},
849 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
850 {-1, libcall, false}}}};
851 static stringop_algs amdfam10_memset[2] = {
852 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
853 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
854 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
855 {-1, libcall, false}}}};
856 struct processor_costs amdfam10_cost = {
857 COSTS_N_INSNS (1), /* cost of an add instruction */
858 COSTS_N_INSNS (2), /* cost of a lea instruction */
859 COSTS_N_INSNS (1), /* variable shift costs */
860 COSTS_N_INSNS (1), /* constant shift costs */
861 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
862 COSTS_N_INSNS (4), /* HI */
863 COSTS_N_INSNS (3), /* SI */
864 COSTS_N_INSNS (4), /* DI */
865 COSTS_N_INSNS (5)}, /* other */
866 0, /* cost of multiply per each bit set */
867 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
868 COSTS_N_INSNS (35), /* HI */
869 COSTS_N_INSNS (51), /* SI */
870 COSTS_N_INSNS (83), /* DI */
871 COSTS_N_INSNS (83)}, /* other */
872 COSTS_N_INSNS (1), /* cost of movsx */
873 COSTS_N_INSNS (1), /* cost of movzx */
874 8, /* "large" insn */
876 4, /* cost for loading QImode using movzbl */
877 {3, 4, 3}, /* cost of loading integer registers
878 in QImode, HImode and SImode.
879 Relative to reg-reg move (2). */
880 {3, 4, 3}, /* cost of storing integer registers */
881 4, /* cost of reg,reg fld/fst */
882 {4, 4, 12}, /* cost of loading fp registers
883 in SFmode, DFmode and XFmode */
884 {6, 6, 8}, /* cost of storing fp registers
885 in SFmode, DFmode and XFmode */
886 2, /* cost of moving MMX register */
887 {3, 3}, /* cost of loading MMX registers
888 in SImode and DImode */
889 {4, 4}, /* cost of storing MMX registers
890 in SImode and DImode */
891 2, /* cost of moving SSE register */
892 {4, 4, 3}, /* cost of loading SSE registers
893 in SImode, DImode and TImode */
894 {4, 4, 5}, /* cost of storing SSE registers
895 in SImode, DImode and TImode */
896 3, /* MMX or SSE register to integer */
898 MOVD reg64, xmmreg Double FSTORE 4
899 MOVD reg32, xmmreg Double FSTORE 4
901 MOVD reg64, xmmreg Double FADD 3
903 MOVD reg32, xmmreg Double FADD 3
905 64, /* size of l1 cache. */
906 512, /* size of l2 cache. */
907 64, /* size of prefetch block */
908 /* New AMD processors never drop prefetches; if they cannot be performed
909 immediately, they are queued. We set number of simultaneous prefetches
910 to a large constant to reflect this (it probably is not a good idea not
911 to limit number of prefetches at all, as their execution also takes some
913 100, /* number of parallel prefetches */
915 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
916 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
917 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
918 COSTS_N_INSNS (2), /* cost of FABS instruction. */
919 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
920 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
924 4, /* scalar_stmt_cost. */
925 2, /* scalar load_cost. */
926 2, /* scalar_store_cost. */
927 6, /* vec_stmt_cost. */
928 0, /* vec_to_scalar_cost. */
929 2, /* scalar_to_vec_cost. */
930 2, /* vec_align_load_cost. */
931 2, /* vec_unalign_load_cost. */
932 2, /* vec_store_cost. */
933 2, /* cond_taken_branch_cost. */
934 1, /* cond_not_taken_branch_cost. */
937 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
938 very small blocks it is better to use loop. For large blocks, libcall
939 can do nontemporary accesses and beat inline considerably. */
940 static stringop_algs bdver1_memcpy[2] = {
941 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
942 {-1, rep_prefix_4_byte, false}}},
943 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
944 {-1, libcall, false}}}};
945 static stringop_algs bdver1_memset[2] = {
946 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
947 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
948 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
949 {-1, libcall, false}}}};
951 const struct processor_costs bdver1_cost = {
952 COSTS_N_INSNS (1), /* cost of an add instruction */
953 COSTS_N_INSNS (1), /* cost of a lea instruction */
954 COSTS_N_INSNS (1), /* variable shift costs */
955 COSTS_N_INSNS (1), /* constant shift costs */
956 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
957 COSTS_N_INSNS (4), /* HI */
958 COSTS_N_INSNS (4), /* SI */
959 COSTS_N_INSNS (6), /* DI */
960 COSTS_N_INSNS (6)}, /* other */
961 0, /* cost of multiply per each bit set */
962 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
963 COSTS_N_INSNS (35), /* HI */
964 COSTS_N_INSNS (51), /* SI */
965 COSTS_N_INSNS (83), /* DI */
966 COSTS_N_INSNS (83)}, /* other */
967 COSTS_N_INSNS (1), /* cost of movsx */
968 COSTS_N_INSNS (1), /* cost of movzx */
969 8, /* "large" insn */
971 4, /* cost for loading QImode using movzbl */
972 {5, 5, 4}, /* cost of loading integer registers
973 in QImode, HImode and SImode.
974 Relative to reg-reg move (2). */
975 {4, 4, 4}, /* cost of storing integer registers */
976 2, /* cost of reg,reg fld/fst */
977 {5, 5, 12}, /* cost of loading fp registers
978 in SFmode, DFmode and XFmode */
979 {4, 4, 8}, /* cost of storing fp registers
980 in SFmode, DFmode and XFmode */
981 2, /* cost of moving MMX register */
982 {4, 4}, /* cost of loading MMX registers
983 in SImode and DImode */
984 {4, 4}, /* cost of storing MMX registers
985 in SImode and DImode */
986 2, /* cost of moving SSE register */
987 {4, 4, 4}, /* cost of loading SSE registers
988 in SImode, DImode and TImode */
989 {4, 4, 4}, /* cost of storing SSE registers
990 in SImode, DImode and TImode */
991 2, /* MMX or SSE register to integer */
993 MOVD reg64, xmmreg Double FSTORE 4
994 MOVD reg32, xmmreg Double FSTORE 4
996 MOVD reg64, xmmreg Double FADD 3
998 MOVD reg32, xmmreg Double FADD 3
1000 16, /* size of l1 cache. */
1001 2048, /* size of l2 cache. */
1002 64, /* size of prefetch block */
1003 /* New AMD processors never drop prefetches; if they cannot be performed
1004 immediately, they are queued. We set number of simultaneous prefetches
1005 to a large constant to reflect this (it probably is not a good idea not
1006 to limit number of prefetches at all, as their execution also takes some
1008 100, /* number of parallel prefetches */
1009 2, /* Branch cost */
1010 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1011 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1012 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1013 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1014 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1015 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1019 6, /* scalar_stmt_cost. */
1020 4, /* scalar load_cost. */
1021 4, /* scalar_store_cost. */
1022 6, /* vec_stmt_cost. */
1023 0, /* vec_to_scalar_cost. */
1024 2, /* scalar_to_vec_cost. */
1025 4, /* vec_align_load_cost. */
1026 4, /* vec_unalign_load_cost. */
1027 4, /* vec_store_cost. */
1028 2, /* cond_taken_branch_cost. */
1029 1, /* cond_not_taken_branch_cost. */
1032 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1033 very small blocks it is better to use loop. For large blocks, libcall
1034 can do nontemporary accesses and beat inline considerably. */
1036 static stringop_algs bdver2_memcpy[2] = {
1037 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1038 {-1, rep_prefix_4_byte, false}}},
1039 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1040 {-1, libcall, false}}}};
1041 static stringop_algs bdver2_memset[2] = {
1042 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1043 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1044 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1045 {-1, libcall, false}}}};
1047 const struct processor_costs bdver2_cost = {
1048 COSTS_N_INSNS (1), /* cost of an add instruction */
1049 COSTS_N_INSNS (1), /* cost of a lea instruction */
1050 COSTS_N_INSNS (1), /* variable shift costs */
1051 COSTS_N_INSNS (1), /* constant shift costs */
1052 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1053 COSTS_N_INSNS (4), /* HI */
1054 COSTS_N_INSNS (4), /* SI */
1055 COSTS_N_INSNS (6), /* DI */
1056 COSTS_N_INSNS (6)}, /* other */
1057 0, /* cost of multiply per each bit set */
1058 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1059 COSTS_N_INSNS (35), /* HI */
1060 COSTS_N_INSNS (51), /* SI */
1061 COSTS_N_INSNS (83), /* DI */
1062 COSTS_N_INSNS (83)}, /* other */
1063 COSTS_N_INSNS (1), /* cost of movsx */
1064 COSTS_N_INSNS (1), /* cost of movzx */
1065 8, /* "large" insn */
1067 4, /* cost for loading QImode using movzbl */
1068 {5, 5, 4}, /* cost of loading integer registers
1069 in QImode, HImode and SImode.
1070 Relative to reg-reg move (2). */
1071 {4, 4, 4}, /* cost of storing integer registers */
1072 2, /* cost of reg,reg fld/fst */
1073 {5, 5, 12}, /* cost of loading fp registers
1074 in SFmode, DFmode and XFmode */
1075 {4, 4, 8}, /* cost of storing fp registers
1076 in SFmode, DFmode and XFmode */
1077 2, /* cost of moving MMX register */
1078 {4, 4}, /* cost of loading MMX registers
1079 in SImode and DImode */
1080 {4, 4}, /* cost of storing MMX registers
1081 in SImode and DImode */
1082 2, /* cost of moving SSE register */
1083 {4, 4, 4}, /* cost of loading SSE registers
1084 in SImode, DImode and TImode */
1085 {4, 4, 4}, /* cost of storing SSE registers
1086 in SImode, DImode and TImode */
1087 2, /* MMX or SSE register to integer */
1089 MOVD reg64, xmmreg Double FSTORE 4
1090 MOVD reg32, xmmreg Double FSTORE 4
1092 MOVD reg64, xmmreg Double FADD 3
1094 MOVD reg32, xmmreg Double FADD 3
1096 16, /* size of l1 cache. */
1097 2048, /* size of l2 cache. */
1098 64, /* size of prefetch block */
1099 /* New AMD processors never drop prefetches; if they cannot be performed
1100 immediately, they are queued. We set number of simultaneous prefetches
1101 to a large constant to reflect this (it probably is not a good idea not
1102 to limit number of prefetches at all, as their execution also takes some
1104 100, /* number of parallel prefetches */
1105 2, /* Branch cost */
1106 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1107 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1108 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1109 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1110 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1111 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1115 6, /* scalar_stmt_cost. */
1116 4, /* scalar load_cost. */
1117 4, /* scalar_store_cost. */
1118 6, /* vec_stmt_cost. */
1119 0, /* vec_to_scalar_cost. */
1120 2, /* scalar_to_vec_cost. */
1121 4, /* vec_align_load_cost. */
1122 4, /* vec_unalign_load_cost. */
1123 4, /* vec_store_cost. */
1124 2, /* cond_taken_branch_cost. */
1125 1, /* cond_not_taken_branch_cost. */
1129 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1130 very small blocks it is better to use loop. For large blocks, libcall
1131 can do nontemporary accesses and beat inline considerably. */
1132 static stringop_algs bdver3_memcpy[2] = {
1133 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1134 {-1, rep_prefix_4_byte, false}}},
1135 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1136 {-1, libcall, false}}}};
1137 static stringop_algs bdver3_memset[2] = {
1138 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1139 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1140 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1141 {-1, libcall, false}}}};
1142 struct processor_costs bdver3_cost = {
1143 COSTS_N_INSNS (1), /* cost of an add instruction */
1144 COSTS_N_INSNS (1), /* cost of a lea instruction */
1145 COSTS_N_INSNS (1), /* variable shift costs */
1146 COSTS_N_INSNS (1), /* constant shift costs */
1147 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1148 COSTS_N_INSNS (4), /* HI */
1149 COSTS_N_INSNS (4), /* SI */
1150 COSTS_N_INSNS (6), /* DI */
1151 COSTS_N_INSNS (6)}, /* other */
1152 0, /* cost of multiply per each bit set */
1153 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1154 COSTS_N_INSNS (35), /* HI */
1155 COSTS_N_INSNS (51), /* SI */
1156 COSTS_N_INSNS (83), /* DI */
1157 COSTS_N_INSNS (83)}, /* other */
1158 COSTS_N_INSNS (1), /* cost of movsx */
1159 COSTS_N_INSNS (1), /* cost of movzx */
1160 8, /* "large" insn */
1162 4, /* cost for loading QImode using movzbl */
1163 {5, 5, 4}, /* cost of loading integer registers
1164 in QImode, HImode and SImode.
1165 Relative to reg-reg move (2). */
1166 {4, 4, 4}, /* cost of storing integer registers */
1167 2, /* cost of reg,reg fld/fst */
1168 {5, 5, 12}, /* cost of loading fp registers
1169 in SFmode, DFmode and XFmode */
1170 {4, 4, 8}, /* cost of storing fp registers
1171 in SFmode, DFmode and XFmode */
1172 2, /* cost of moving MMX register */
1173 {4, 4}, /* cost of loading MMX registers
1174 in SImode and DImode */
1175 {4, 4}, /* cost of storing MMX registers
1176 in SImode and DImode */
1177 2, /* cost of moving SSE register */
1178 {4, 4, 4}, /* cost of loading SSE registers
1179 in SImode, DImode and TImode */
1180 {4, 4, 4}, /* cost of storing SSE registers
1181 in SImode, DImode and TImode */
1182 2, /* MMX or SSE register to integer */
1183 16, /* size of l1 cache. */
1184 2048, /* size of l2 cache. */
1185 64, /* size of prefetch block */
1186 /* New AMD processors never drop prefetches; if they cannot be performed
1187 immediately, they are queued. We set number of simultaneous prefetches
1188 to a large constant to reflect this (it probably is not a good idea not
1189 to limit number of prefetches at all, as their execution also takes some
1191 100, /* number of parallel prefetches */
1192 2, /* Branch cost */
1193 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1194 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1195 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1196 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1197 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1198 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1202 6, /* scalar_stmt_cost. */
1203 4, /* scalar load_cost. */
1204 4, /* scalar_store_cost. */
1205 6, /* vec_stmt_cost. */
1206 0, /* vec_to_scalar_cost. */
1207 2, /* scalar_to_vec_cost. */
1208 4, /* vec_align_load_cost. */
1209 4, /* vec_unalign_load_cost. */
1210 4, /* vec_store_cost. */
1211 2, /* cond_taken_branch_cost. */
1212 1, /* cond_not_taken_branch_cost. */
1215 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1216 very small blocks it is better to use loop. For large blocks, libcall
1217 can do nontemporary accesses and beat inline considerably. */
1218 static stringop_algs bdver4_memcpy[2] = {
1219 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1220 {-1, rep_prefix_4_byte, false}}},
1221 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1222 {-1, libcall, false}}}};
1223 static stringop_algs bdver4_memset[2] = {
1224 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1225 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1226 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1227 {-1, libcall, false}}}};
1228 struct processor_costs bdver4_cost = {
1229 COSTS_N_INSNS (1), /* cost of an add instruction */
1230 COSTS_N_INSNS (1), /* cost of a lea instruction */
1231 COSTS_N_INSNS (1), /* variable shift costs */
1232 COSTS_N_INSNS (1), /* constant shift costs */
1233 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1234 COSTS_N_INSNS (4), /* HI */
1235 COSTS_N_INSNS (4), /* SI */
1236 COSTS_N_INSNS (6), /* DI */
1237 COSTS_N_INSNS (6)}, /* other */
1238 0, /* cost of multiply per each bit set */
1239 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1240 COSTS_N_INSNS (35), /* HI */
1241 COSTS_N_INSNS (51), /* SI */
1242 COSTS_N_INSNS (83), /* DI */
1243 COSTS_N_INSNS (83)}, /* other */
1244 COSTS_N_INSNS (1), /* cost of movsx */
1245 COSTS_N_INSNS (1), /* cost of movzx */
1246 8, /* "large" insn */
1248 4, /* cost for loading QImode using movzbl */
1249 {5, 5, 4}, /* cost of loading integer registers
1250 in QImode, HImode and SImode.
1251 Relative to reg-reg move (2). */
1252 {4, 4, 4}, /* cost of storing integer registers */
1253 2, /* cost of reg,reg fld/fst */
1254 {5, 5, 12}, /* cost of loading fp registers
1255 in SFmode, DFmode and XFmode */
1256 {4, 4, 8}, /* cost of storing fp registers
1257 in SFmode, DFmode and XFmode */
1258 2, /* cost of moving MMX register */
1259 {4, 4}, /* cost of loading MMX registers
1260 in SImode and DImode */
1261 {4, 4}, /* cost of storing MMX registers
1262 in SImode and DImode */
1263 2, /* cost of moving SSE register */
1264 {4, 4, 4}, /* cost of loading SSE registers
1265 in SImode, DImode and TImode */
1266 {4, 4, 4}, /* cost of storing SSE registers
1267 in SImode, DImode and TImode */
1268 2, /* MMX or SSE register to integer */
1269 16, /* size of l1 cache. */
1270 2048, /* size of l2 cache. */
1271 64, /* size of prefetch block */
1272 /* New AMD processors never drop prefetches; if they cannot be performed
1273 immediately, they are queued. We set number of simultaneous prefetches
1274 to a large constant to reflect this (it probably is not a good idea not
1275 to limit number of prefetches at all, as their execution also takes some
1277 100, /* number of parallel prefetches */
1278 2, /* Branch cost */
1279 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1280 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1281 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1282 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1283 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1284 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1288 6, /* scalar_stmt_cost. */
1289 4, /* scalar load_cost. */
1290 4, /* scalar_store_cost. */
1291 6, /* vec_stmt_cost. */
1292 0, /* vec_to_scalar_cost. */
1293 2, /* scalar_to_vec_cost. */
1294 4, /* vec_align_load_cost. */
1295 4, /* vec_unalign_load_cost. */
1296 4, /* vec_store_cost. */
1297 2, /* cond_taken_branch_cost. */
1298 1, /* cond_not_taken_branch_cost. */
1301 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1302 very small blocks it is better to use loop. For large blocks, libcall can
1303 do nontemporary accesses and beat inline considerably. */
1304 static stringop_algs btver1_memcpy[2] = {
1305 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1306 {-1, rep_prefix_4_byte, false}}},
1307 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1308 {-1, libcall, false}}}};
1309 static stringop_algs btver1_memset[2] = {
1310 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1311 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1312 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1313 {-1, libcall, false}}}};
1314 const struct processor_costs btver1_cost = {
1315 COSTS_N_INSNS (1), /* cost of an add instruction */
1316 COSTS_N_INSNS (2), /* cost of a lea instruction */
1317 COSTS_N_INSNS (1), /* variable shift costs */
1318 COSTS_N_INSNS (1), /* constant shift costs */
1319 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1320 COSTS_N_INSNS (4), /* HI */
1321 COSTS_N_INSNS (3), /* SI */
1322 COSTS_N_INSNS (4), /* DI */
1323 COSTS_N_INSNS (5)}, /* other */
1324 0, /* cost of multiply per each bit set */
1325 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1326 COSTS_N_INSNS (35), /* HI */
1327 COSTS_N_INSNS (51), /* SI */
1328 COSTS_N_INSNS (83), /* DI */
1329 COSTS_N_INSNS (83)}, /* other */
1330 COSTS_N_INSNS (1), /* cost of movsx */
1331 COSTS_N_INSNS (1), /* cost of movzx */
1332 8, /* "large" insn */
1334 4, /* cost for loading QImode using movzbl */
1335 {3, 4, 3}, /* cost of loading integer registers
1336 in QImode, HImode and SImode.
1337 Relative to reg-reg move (2). */
1338 {3, 4, 3}, /* cost of storing integer registers */
1339 4, /* cost of reg,reg fld/fst */
1340 {4, 4, 12}, /* cost of loading fp registers
1341 in SFmode, DFmode and XFmode */
1342 {6, 6, 8}, /* cost of storing fp registers
1343 in SFmode, DFmode and XFmode */
1344 2, /* cost of moving MMX register */
1345 {3, 3}, /* cost of loading MMX registers
1346 in SImode and DImode */
1347 {4, 4}, /* cost of storing MMX registers
1348 in SImode and DImode */
1349 2, /* cost of moving SSE register */
1350 {4, 4, 3}, /* cost of loading SSE registers
1351 in SImode, DImode and TImode */
1352 {4, 4, 5}, /* cost of storing SSE registers
1353 in SImode, DImode and TImode */
1354 3, /* MMX or SSE register to integer */
1356 MOVD reg64, xmmreg Double FSTORE 4
1357 MOVD reg32, xmmreg Double FSTORE 4
1359 MOVD reg64, xmmreg Double FADD 3
1361 MOVD reg32, xmmreg Double FADD 3
1363 32, /* size of l1 cache. */
1364 512, /* size of l2 cache. */
1365 64, /* size of prefetch block */
1366 100, /* number of parallel prefetches */
1367 2, /* Branch cost */
1368 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1369 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1370 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1371 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1372 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1373 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1377 4, /* scalar_stmt_cost. */
1378 2, /* scalar load_cost. */
1379 2, /* scalar_store_cost. */
1380 6, /* vec_stmt_cost. */
1381 0, /* vec_to_scalar_cost. */
1382 2, /* scalar_to_vec_cost. */
1383 2, /* vec_align_load_cost. */
1384 2, /* vec_unalign_load_cost. */
1385 2, /* vec_store_cost. */
1386 2, /* cond_taken_branch_cost. */
1387 1, /* cond_not_taken_branch_cost. */
1390 static stringop_algs btver2_memcpy[2] = {
1391 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1392 {-1, rep_prefix_4_byte, false}}},
1393 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1394 {-1, libcall, false}}}};
1395 static stringop_algs btver2_memset[2] = {
1396 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1397 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1398 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1399 {-1, libcall, false}}}};
1400 const struct processor_costs btver2_cost = {
1401 COSTS_N_INSNS (1), /* cost of an add instruction */
1402 COSTS_N_INSNS (2), /* cost of a lea instruction */
1403 COSTS_N_INSNS (1), /* variable shift costs */
1404 COSTS_N_INSNS (1), /* constant shift costs */
1405 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1406 COSTS_N_INSNS (4), /* HI */
1407 COSTS_N_INSNS (3), /* SI */
1408 COSTS_N_INSNS (4), /* DI */
1409 COSTS_N_INSNS (5)}, /* other */
1410 0, /* cost of multiply per each bit set */
1411 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1412 COSTS_N_INSNS (35), /* HI */
1413 COSTS_N_INSNS (51), /* SI */
1414 COSTS_N_INSNS (83), /* DI */
1415 COSTS_N_INSNS (83)}, /* other */
1416 COSTS_N_INSNS (1), /* cost of movsx */
1417 COSTS_N_INSNS (1), /* cost of movzx */
1418 8, /* "large" insn */
1420 4, /* cost for loading QImode using movzbl */
1421 {3, 4, 3}, /* cost of loading integer registers
1422 in QImode, HImode and SImode.
1423 Relative to reg-reg move (2). */
1424 {3, 4, 3}, /* cost of storing integer registers */
1425 4, /* cost of reg,reg fld/fst */
1426 {4, 4, 12}, /* cost of loading fp registers
1427 in SFmode, DFmode and XFmode */
1428 {6, 6, 8}, /* cost of storing fp registers
1429 in SFmode, DFmode and XFmode */
1430 2, /* cost of moving MMX register */
1431 {3, 3}, /* cost of loading MMX registers
1432 in SImode and DImode */
1433 {4, 4}, /* cost of storing MMX registers
1434 in SImode and DImode */
1435 2, /* cost of moving SSE register */
1436 {4, 4, 3}, /* cost of loading SSE registers
1437 in SImode, DImode and TImode */
1438 {4, 4, 5}, /* cost of storing SSE registers
1439 in SImode, DImode and TImode */
1440 3, /* MMX or SSE register to integer */
1442 MOVD reg64, xmmreg Double FSTORE 4
1443 MOVD reg32, xmmreg Double FSTORE 4
1445 MOVD reg64, xmmreg Double FADD 3
1447 MOVD reg32, xmmreg Double FADD 3
1449 32, /* size of l1 cache. */
1450 2048, /* size of l2 cache. */
1451 64, /* size of prefetch block */
1452 100, /* number of parallel prefetches */
1453 2, /* Branch cost */
1454 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1455 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1456 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1457 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1458 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1459 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1462 4, /* scalar_stmt_cost. */
1463 2, /* scalar load_cost. */
1464 2, /* scalar_store_cost. */
1465 6, /* vec_stmt_cost. */
1466 0, /* vec_to_scalar_cost. */
1467 2, /* scalar_to_vec_cost. */
1468 2, /* vec_align_load_cost. */
1469 2, /* vec_unalign_load_cost. */
1470 2, /* vec_store_cost. */
1471 2, /* cond_taken_branch_cost. */
1472 1, /* cond_not_taken_branch_cost. */
1475 static stringop_algs pentium4_memcpy[2] = {
1476 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1477 DUMMY_STRINGOP_ALGS};
1478 static stringop_algs pentium4_memset[2] = {
1479 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1480 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1481 DUMMY_STRINGOP_ALGS};
1484 struct processor_costs pentium4_cost = {
1485 COSTS_N_INSNS (1), /* cost of an add instruction */
1486 COSTS_N_INSNS (3), /* cost of a lea instruction */
1487 COSTS_N_INSNS (4), /* variable shift costs */
1488 COSTS_N_INSNS (4), /* constant shift costs */
1489 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1490 COSTS_N_INSNS (15), /* HI */
1491 COSTS_N_INSNS (15), /* SI */
1492 COSTS_N_INSNS (15), /* DI */
1493 COSTS_N_INSNS (15)}, /* other */
1494 0, /* cost of multiply per each bit set */
1495 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1496 COSTS_N_INSNS (56), /* HI */
1497 COSTS_N_INSNS (56), /* SI */
1498 COSTS_N_INSNS (56), /* DI */
1499 COSTS_N_INSNS (56)}, /* other */
1500 COSTS_N_INSNS (1), /* cost of movsx */
1501 COSTS_N_INSNS (1), /* cost of movzx */
1502 16, /* "large" insn */
1504 2, /* cost for loading QImode using movzbl */
1505 {4, 5, 4}, /* cost of loading integer registers
1506 in QImode, HImode and SImode.
1507 Relative to reg-reg move (2). */
1508 {2, 3, 2}, /* cost of storing integer registers */
1509 2, /* cost of reg,reg fld/fst */
1510 {2, 2, 6}, /* cost of loading fp registers
1511 in SFmode, DFmode and XFmode */
1512 {4, 4, 6}, /* cost of storing fp registers
1513 in SFmode, DFmode and XFmode */
1514 2, /* cost of moving MMX register */
1515 {2, 2}, /* cost of loading MMX registers
1516 in SImode and DImode */
1517 {2, 2}, /* cost of storing MMX registers
1518 in SImode and DImode */
1519 12, /* cost of moving SSE register */
1520 {12, 12, 12}, /* cost of loading SSE registers
1521 in SImode, DImode and TImode */
1522 {2, 2, 8}, /* cost of storing SSE registers
1523 in SImode, DImode and TImode */
1524 10, /* MMX or SSE register to integer */
1525 8, /* size of l1 cache. */
1526 256, /* size of l2 cache. */
1527 64, /* size of prefetch block */
1528 6, /* number of parallel prefetches */
1529 2, /* Branch cost */
1530 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1531 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1532 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1533 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1534 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1535 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1538 1, /* scalar_stmt_cost. */
1539 1, /* scalar load_cost. */
1540 1, /* scalar_store_cost. */
1541 1, /* vec_stmt_cost. */
1542 1, /* vec_to_scalar_cost. */
1543 1, /* scalar_to_vec_cost. */
1544 1, /* vec_align_load_cost. */
1545 2, /* vec_unalign_load_cost. */
1546 1, /* vec_store_cost. */
1547 3, /* cond_taken_branch_cost. */
1548 1, /* cond_not_taken_branch_cost. */
1551 static stringop_algs nocona_memcpy[2] = {
1552 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1553 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1554 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1556 static stringop_algs nocona_memset[2] = {
1557 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1558 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1559 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1560 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1563 struct processor_costs nocona_cost = {
1564 COSTS_N_INSNS (1), /* cost of an add instruction */
1565 COSTS_N_INSNS (1), /* cost of a lea instruction */
1566 COSTS_N_INSNS (1), /* variable shift costs */
1567 COSTS_N_INSNS (1), /* constant shift costs */
1568 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1569 COSTS_N_INSNS (10), /* HI */
1570 COSTS_N_INSNS (10), /* SI */
1571 COSTS_N_INSNS (10), /* DI */
1572 COSTS_N_INSNS (10)}, /* other */
1573 0, /* cost of multiply per each bit set */
1574 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1575 COSTS_N_INSNS (66), /* HI */
1576 COSTS_N_INSNS (66), /* SI */
1577 COSTS_N_INSNS (66), /* DI */
1578 COSTS_N_INSNS (66)}, /* other */
1579 COSTS_N_INSNS (1), /* cost of movsx */
1580 COSTS_N_INSNS (1), /* cost of movzx */
1581 16, /* "large" insn */
1582 17, /* MOVE_RATIO */
1583 4, /* cost for loading QImode using movzbl */
1584 {4, 4, 4}, /* cost of loading integer registers
1585 in QImode, HImode and SImode.
1586 Relative to reg-reg move (2). */
1587 {4, 4, 4}, /* cost of storing integer registers */
1588 3, /* cost of reg,reg fld/fst */
1589 {12, 12, 12}, /* cost of loading fp registers
1590 in SFmode, DFmode and XFmode */
1591 {4, 4, 4}, /* cost of storing fp registers
1592 in SFmode, DFmode and XFmode */
1593 6, /* cost of moving MMX register */
1594 {12, 12}, /* cost of loading MMX registers
1595 in SImode and DImode */
1596 {12, 12}, /* cost of storing MMX registers
1597 in SImode and DImode */
1598 6, /* cost of moving SSE register */
1599 {12, 12, 12}, /* cost of loading SSE registers
1600 in SImode, DImode and TImode */
1601 {12, 12, 12}, /* cost of storing SSE registers
1602 in SImode, DImode and TImode */
1603 8, /* MMX or SSE register to integer */
1604 8, /* size of l1 cache. */
1605 1024, /* size of l2 cache. */
1606 64, /* size of prefetch block */
1607 8, /* number of parallel prefetches */
1608 1, /* Branch cost */
1609 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1610 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1611 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1612 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1613 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1614 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1617 1, /* scalar_stmt_cost. */
1618 1, /* scalar load_cost. */
1619 1, /* scalar_store_cost. */
1620 1, /* vec_stmt_cost. */
1621 1, /* vec_to_scalar_cost. */
1622 1, /* scalar_to_vec_cost. */
1623 1, /* vec_align_load_cost. */
1624 2, /* vec_unalign_load_cost. */
1625 1, /* vec_store_cost. */
1626 3, /* cond_taken_branch_cost. */
1627 1, /* cond_not_taken_branch_cost. */
1630 static stringop_algs atom_memcpy[2] = {
1631 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1632 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1633 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1634 static stringop_algs atom_memset[2] = {
1635 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1636 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1637 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1638 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1640 struct processor_costs atom_cost = {
1641 COSTS_N_INSNS (1), /* cost of an add instruction */
1642 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1643 COSTS_N_INSNS (1), /* variable shift costs */
1644 COSTS_N_INSNS (1), /* constant shift costs */
1645 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1646 COSTS_N_INSNS (4), /* HI */
1647 COSTS_N_INSNS (3), /* SI */
1648 COSTS_N_INSNS (4), /* DI */
1649 COSTS_N_INSNS (2)}, /* other */
1650 0, /* cost of multiply per each bit set */
1651 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1652 COSTS_N_INSNS (26), /* HI */
1653 COSTS_N_INSNS (42), /* SI */
1654 COSTS_N_INSNS (74), /* DI */
1655 COSTS_N_INSNS (74)}, /* other */
1656 COSTS_N_INSNS (1), /* cost of movsx */
1657 COSTS_N_INSNS (1), /* cost of movzx */
1658 8, /* "large" insn */
1659 17, /* MOVE_RATIO */
1660 4, /* cost for loading QImode using movzbl */
1661 {4, 4, 4}, /* cost of loading integer registers
1662 in QImode, HImode and SImode.
1663 Relative to reg-reg move (2). */
1664 {4, 4, 4}, /* cost of storing integer registers */
1665 4, /* cost of reg,reg fld/fst */
1666 {12, 12, 12}, /* cost of loading fp registers
1667 in SFmode, DFmode and XFmode */
1668 {6, 6, 8}, /* cost of storing fp registers
1669 in SFmode, DFmode and XFmode */
1670 2, /* cost of moving MMX register */
1671 {8, 8}, /* cost of loading MMX registers
1672 in SImode and DImode */
1673 {8, 8}, /* cost of storing MMX registers
1674 in SImode and DImode */
1675 2, /* cost of moving SSE register */
1676 {8, 8, 8}, /* cost of loading SSE registers
1677 in SImode, DImode and TImode */
1678 {8, 8, 8}, /* cost of storing SSE registers
1679 in SImode, DImode and TImode */
1680 5, /* MMX or SSE register to integer */
1681 32, /* size of l1 cache. */
1682 256, /* size of l2 cache. */
1683 64, /* size of prefetch block */
1684 6, /* number of parallel prefetches */
1685 3, /* Branch cost */
1686 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1687 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1688 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1689 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1690 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1691 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1694 1, /* scalar_stmt_cost. */
1695 1, /* scalar load_cost. */
1696 1, /* scalar_store_cost. */
1697 1, /* vec_stmt_cost. */
1698 1, /* vec_to_scalar_cost. */
1699 1, /* scalar_to_vec_cost. */
1700 1, /* vec_align_load_cost. */
1701 2, /* vec_unalign_load_cost. */
1702 1, /* vec_store_cost. */
1703 3, /* cond_taken_branch_cost. */
1704 1, /* cond_not_taken_branch_cost. */
1707 static stringop_algs slm_memcpy[2] = {
1708 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1709 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1710 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1711 static stringop_algs slm_memset[2] = {
1712 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1713 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1714 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1715 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1717 struct processor_costs slm_cost = {
1718 COSTS_N_INSNS (1), /* cost of an add instruction */
1719 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1720 COSTS_N_INSNS (1), /* variable shift costs */
1721 COSTS_N_INSNS (1), /* constant shift costs */
1722 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1723 COSTS_N_INSNS (3), /* HI */
1724 COSTS_N_INSNS (3), /* SI */
1725 COSTS_N_INSNS (4), /* DI */
1726 COSTS_N_INSNS (2)}, /* other */
1727 0, /* cost of multiply per each bit set */
1728 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1729 COSTS_N_INSNS (26), /* HI */
1730 COSTS_N_INSNS (42), /* SI */
1731 COSTS_N_INSNS (74), /* DI */
1732 COSTS_N_INSNS (74)}, /* other */
1733 COSTS_N_INSNS (1), /* cost of movsx */
1734 COSTS_N_INSNS (1), /* cost of movzx */
1735 8, /* "large" insn */
1736 17, /* MOVE_RATIO */
1737 4, /* cost for loading QImode using movzbl */
1738 {4, 4, 4}, /* cost of loading integer registers
1739 in QImode, HImode and SImode.
1740 Relative to reg-reg move (2). */
1741 {4, 4, 4}, /* cost of storing integer registers */
1742 4, /* cost of reg,reg fld/fst */
1743 {12, 12, 12}, /* cost of loading fp registers
1744 in SFmode, DFmode and XFmode */
1745 {6, 6, 8}, /* cost of storing fp registers
1746 in SFmode, DFmode and XFmode */
1747 2, /* cost of moving MMX register */
1748 {8, 8}, /* cost of loading MMX registers
1749 in SImode and DImode */
1750 {8, 8}, /* cost of storing MMX registers
1751 in SImode and DImode */
1752 2, /* cost of moving SSE register */
1753 {8, 8, 8}, /* cost of loading SSE registers
1754 in SImode, DImode and TImode */
1755 {8, 8, 8}, /* cost of storing SSE registers
1756 in SImode, DImode and TImode */
1757 5, /* MMX or SSE register to integer */
1758 32, /* size of l1 cache. */
1759 256, /* size of l2 cache. */
1760 64, /* size of prefetch block */
1761 6, /* number of parallel prefetches */
1762 3, /* Branch cost */
1763 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1764 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1765 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1766 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1767 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1768 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1771 1, /* scalar_stmt_cost. */
1772 1, /* scalar load_cost. */
1773 1, /* scalar_store_cost. */
1774 1, /* vec_stmt_cost. */
1775 4, /* vec_to_scalar_cost. */
1776 1, /* scalar_to_vec_cost. */
1777 1, /* vec_align_load_cost. */
1778 2, /* vec_unalign_load_cost. */
1779 1, /* vec_store_cost. */
1780 3, /* cond_taken_branch_cost. */
1781 1, /* cond_not_taken_branch_cost. */
1784 static stringop_algs intel_memcpy[2] = {
1785 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1786 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1787 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1788 static stringop_algs intel_memset[2] = {
1789 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1790 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1791 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1792 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1794 struct processor_costs intel_cost = {
1795 COSTS_N_INSNS (1), /* cost of an add instruction */
1796 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1797 COSTS_N_INSNS (1), /* variable shift costs */
1798 COSTS_N_INSNS (1), /* constant shift costs */
1799 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1800 COSTS_N_INSNS (3), /* HI */
1801 COSTS_N_INSNS (3), /* SI */
1802 COSTS_N_INSNS (4), /* DI */
1803 COSTS_N_INSNS (2)}, /* other */
1804 0, /* cost of multiply per each bit set */
1805 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1806 COSTS_N_INSNS (26), /* HI */
1807 COSTS_N_INSNS (42), /* SI */
1808 COSTS_N_INSNS (74), /* DI */
1809 COSTS_N_INSNS (74)}, /* other */
1810 COSTS_N_INSNS (1), /* cost of movsx */
1811 COSTS_N_INSNS (1), /* cost of movzx */
1812 8, /* "large" insn */
1813 17, /* MOVE_RATIO */
1814 4, /* cost for loading QImode using movzbl */
1815 {4, 4, 4}, /* cost of loading integer registers
1816 in QImode, HImode and SImode.
1817 Relative to reg-reg move (2). */
1818 {4, 4, 4}, /* cost of storing integer registers */
1819 4, /* cost of reg,reg fld/fst */
1820 {12, 12, 12}, /* cost of loading fp registers
1821 in SFmode, DFmode and XFmode */
1822 {6, 6, 8}, /* cost of storing fp registers
1823 in SFmode, DFmode and XFmode */
1824 2, /* cost of moving MMX register */
1825 {8, 8}, /* cost of loading MMX registers
1826 in SImode and DImode */
1827 {8, 8}, /* cost of storing MMX registers
1828 in SImode and DImode */
1829 2, /* cost of moving SSE register */
1830 {8, 8, 8}, /* cost of loading SSE registers
1831 in SImode, DImode and TImode */
1832 {8, 8, 8}, /* cost of storing SSE registers
1833 in SImode, DImode and TImode */
1834 5, /* MMX or SSE register to integer */
1835 32, /* size of l1 cache. */
1836 256, /* size of l2 cache. */
1837 64, /* size of prefetch block */
1838 6, /* number of parallel prefetches */
1839 3, /* Branch cost */
1840 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1841 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1842 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1843 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1844 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1845 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1848 1, /* scalar_stmt_cost. */
1849 1, /* scalar load_cost. */
1850 1, /* scalar_store_cost. */
1851 1, /* vec_stmt_cost. */
1852 4, /* vec_to_scalar_cost. */
1853 1, /* scalar_to_vec_cost. */
1854 1, /* vec_align_load_cost. */
1855 2, /* vec_unalign_load_cost. */
1856 1, /* vec_store_cost. */
1857 3, /* cond_taken_branch_cost. */
1858 1, /* cond_not_taken_branch_cost. */
1861 /* Generic should produce code tuned for Core-i7 (and newer chips)
1862 and btver1 (and newer chips). */
1864 static stringop_algs generic_memcpy[2] = {
1865 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1866 {-1, libcall, false}}},
1867 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1868 {-1, libcall, false}}}};
1869 static stringop_algs generic_memset[2] = {
1870 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1871 {-1, libcall, false}}},
1872 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1873 {-1, libcall, false}}}};
1875 struct processor_costs generic_cost = {
1876 COSTS_N_INSNS (1), /* cost of an add instruction */
1877 /* On all chips taken into consideration lea is 2 cycles and more. With
1878 this cost however our current implementation of synth_mult results in
1879 use of unnecessary temporary registers causing regression on several
1880 SPECfp benchmarks. */
1881 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1882 COSTS_N_INSNS (1), /* variable shift costs */
1883 COSTS_N_INSNS (1), /* constant shift costs */
1884 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1885 COSTS_N_INSNS (4), /* HI */
1886 COSTS_N_INSNS (3), /* SI */
1887 COSTS_N_INSNS (4), /* DI */
1888 COSTS_N_INSNS (2)}, /* other */
1889 0, /* cost of multiply per each bit set */
1890 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1891 COSTS_N_INSNS (26), /* HI */
1892 COSTS_N_INSNS (42), /* SI */
1893 COSTS_N_INSNS (74), /* DI */
1894 COSTS_N_INSNS (74)}, /* other */
1895 COSTS_N_INSNS (1), /* cost of movsx */
1896 COSTS_N_INSNS (1), /* cost of movzx */
1897 8, /* "large" insn */
1898 17, /* MOVE_RATIO */
1899 4, /* cost for loading QImode using movzbl */
1900 {4, 4, 4}, /* cost of loading integer registers
1901 in QImode, HImode and SImode.
1902 Relative to reg-reg move (2). */
1903 {4, 4, 4}, /* cost of storing integer registers */
1904 4, /* cost of reg,reg fld/fst */
1905 {12, 12, 12}, /* cost of loading fp registers
1906 in SFmode, DFmode and XFmode */
1907 {6, 6, 8}, /* cost of storing fp registers
1908 in SFmode, DFmode and XFmode */
1909 2, /* cost of moving MMX register */
1910 {8, 8}, /* cost of loading MMX registers
1911 in SImode and DImode */
1912 {8, 8}, /* cost of storing MMX registers
1913 in SImode and DImode */
1914 2, /* cost of moving SSE register */
1915 {8, 8, 8}, /* cost of loading SSE registers
1916 in SImode, DImode and TImode */
1917 {8, 8, 8}, /* cost of storing SSE registers
1918 in SImode, DImode and TImode */
1919 5, /* MMX or SSE register to integer */
1920 32, /* size of l1 cache. */
1921 512, /* size of l2 cache. */
1922 64, /* size of prefetch block */
1923 6, /* number of parallel prefetches */
1924 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1925 value is increased to perhaps more appropriate value of 5. */
1926 3, /* Branch cost */
1927 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1928 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1929 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1930 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1931 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1932 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1935 1, /* scalar_stmt_cost. */
1936 1, /* scalar load_cost. */
1937 1, /* scalar_store_cost. */
1938 1, /* vec_stmt_cost. */
1939 1, /* vec_to_scalar_cost. */
1940 1, /* scalar_to_vec_cost. */
1941 1, /* vec_align_load_cost. */
1942 2, /* vec_unalign_load_cost. */
1943 1, /* vec_store_cost. */
1944 3, /* cond_taken_branch_cost. */
1945 1, /* cond_not_taken_branch_cost. */
1948 /* core_cost should produce code tuned for Core familly of CPUs. */
1949 static stringop_algs core_memcpy[2] = {
1950 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1951 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1952 {-1, libcall, false}}}};
1953 static stringop_algs core_memset[2] = {
1954 {libcall, {{6, loop_1_byte, true},
1956 {8192, rep_prefix_4_byte, true},
1957 {-1, libcall, false}}},
1958 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1959 {-1, libcall, false}}}};
1962 struct processor_costs core_cost = {
1963 COSTS_N_INSNS (1), /* cost of an add instruction */
1964 /* On all chips taken into consideration lea is 2 cycles and more. With
1965 this cost however our current implementation of synth_mult results in
1966 use of unnecessary temporary registers causing regression on several
1967 SPECfp benchmarks. */
1968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1969 COSTS_N_INSNS (1), /* variable shift costs */
1970 COSTS_N_INSNS (1), /* constant shift costs */
1971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1972 COSTS_N_INSNS (4), /* HI */
1973 COSTS_N_INSNS (3), /* SI */
1974 COSTS_N_INSNS (4), /* DI */
1975 COSTS_N_INSNS (2)}, /* other */
1976 0, /* cost of multiply per each bit set */
1977 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1978 COSTS_N_INSNS (26), /* HI */
1979 COSTS_N_INSNS (42), /* SI */
1980 COSTS_N_INSNS (74), /* DI */
1981 COSTS_N_INSNS (74)}, /* other */
1982 COSTS_N_INSNS (1), /* cost of movsx */
1983 COSTS_N_INSNS (1), /* cost of movzx */
1984 8, /* "large" insn */
1985 17, /* MOVE_RATIO */
1986 4, /* cost for loading QImode using movzbl */
1987 {4, 4, 4}, /* cost of loading integer registers
1988 in QImode, HImode and SImode.
1989 Relative to reg-reg move (2). */
1990 {4, 4, 4}, /* cost of storing integer registers */
1991 4, /* cost of reg,reg fld/fst */
1992 {12, 12, 12}, /* cost of loading fp registers
1993 in SFmode, DFmode and XFmode */
1994 {6, 6, 8}, /* cost of storing fp registers
1995 in SFmode, DFmode and XFmode */
1996 2, /* cost of moving MMX register */
1997 {8, 8}, /* cost of loading MMX registers
1998 in SImode and DImode */
1999 {8, 8}, /* cost of storing MMX registers
2000 in SImode and DImode */
2001 2, /* cost of moving SSE register */
2002 {8, 8, 8}, /* cost of loading SSE registers
2003 in SImode, DImode and TImode */
2004 {8, 8, 8}, /* cost of storing SSE registers
2005 in SImode, DImode and TImode */
2006 5, /* MMX or SSE register to integer */
2007 64, /* size of l1 cache. */
2008 512, /* size of l2 cache. */
2009 64, /* size of prefetch block */
2010 6, /* number of parallel prefetches */
2011 /* FIXME perhaps more appropriate value is 5. */
2012 3, /* Branch cost */
2013 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2014 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2015 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2016 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2017 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2018 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2021 1, /* scalar_stmt_cost. */
2022 1, /* scalar load_cost. */
2023 1, /* scalar_store_cost. */
2024 1, /* vec_stmt_cost. */
2025 1, /* vec_to_scalar_cost. */
2026 1, /* scalar_to_vec_cost. */
2027 1, /* vec_align_load_cost. */
2028 2, /* vec_unalign_load_cost. */
2029 1, /* vec_store_cost. */
2030 3, /* cond_taken_branch_cost. */
2031 1, /* cond_not_taken_branch_cost. */
2035 /* Set by -mtune. */
2036 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2038 /* Set by -mtune or -Os. */
2039 const struct processor_costs *ix86_cost = &pentium_cost;
2041 /* Processor feature/optimization bitmasks. */
2042 #define m_386 (1<<PROCESSOR_I386)
2043 #define m_486 (1<<PROCESSOR_I486)
2044 #define m_PENT (1<<PROCESSOR_PENTIUM)
2045 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2046 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2047 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2048 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2049 #define m_CORE2 (1<<PROCESSOR_CORE2)
2050 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2051 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2052 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2053 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2054 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2055 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2056 #define m_KNL (1<<PROCESSOR_KNL)
2057 #define m_INTEL (1<<PROCESSOR_INTEL)
2059 #define m_GEODE (1<<PROCESSOR_GEODE)
2060 #define m_K6 (1<<PROCESSOR_K6)
2061 #define m_K6_GEODE (m_K6 | m_GEODE)
2062 #define m_K8 (1<<PROCESSOR_K8)
2063 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2064 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2065 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2066 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2067 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2068 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2069 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2070 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2071 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2072 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2073 #define m_BTVER (m_BTVER1 | m_BTVER2)
2074 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2076 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2078 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2080 #define DEF_TUNE(tune, name, selector) name,
2081 #include "x86-tune.def"
2085 /* Feature tests against the various tunings. */
2086 unsigned char ix86_tune_features[X86_TUNE_LAST];
2088 /* Feature tests against the various tunings used to create ix86_tune_features
2089 based on the processor mask. */
2090 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2092 #define DEF_TUNE(tune, name, selector) selector,
2093 #include "x86-tune.def"
2097 /* Feature tests against the various architecture variations. */
2098 unsigned char ix86_arch_features[X86_ARCH_LAST];
2100 /* Feature tests against the various architecture variations, used to create
2101 ix86_arch_features based on the processor mask. */
2102 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2103 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2104 ~(m_386 | m_486 | m_PENT | m_K6),
2106 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2109 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2112 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2115 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2119 /* In case the average insn count for single function invocation is
2120 lower than this constant, emit fast (but longer) prologue and
2122 #define FAST_PROLOGUE_INSN_COUNT 20
2124 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2125 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2126 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2127 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2129 /* Array of the smallest class containing reg number REGNO, indexed by
2130 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2132 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2134 /* ax, dx, cx, bx */
2135 AREG, DREG, CREG, BREG,
2136 /* si, di, bp, sp */
2137 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2139 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2140 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2143 /* flags, fpsr, fpcr, frame */
2144 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2146 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2149 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2152 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2153 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2154 /* SSE REX registers */
2155 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2157 /* AVX-512 SSE registers */
2158 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2159 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2160 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2161 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2162 /* Mask registers. */
2163 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2164 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2165 /* MPX bound registers */
2166 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2169 /* The "default" register map used in 32bit mode. */
2171 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2173 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2174 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2175 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2176 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2177 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2178 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2179 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2180 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2181 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2182 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2183 101, 102, 103, 104, /* bound registers */
2186 /* The "default" register map used in 64bit mode. */
2188 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2190 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2191 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2192 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2193 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2194 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2195 8,9,10,11,12,13,14,15, /* extended integer registers */
2196 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2197 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2198 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2199 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2200 126, 127, 128, 129, /* bound registers */
2203 /* Define the register numbers to be used in Dwarf debugging information.
2204 The SVR4 reference port C compiler uses the following register numbers
2205 in its Dwarf output code:
2206 0 for %eax (gcc regno = 0)
2207 1 for %ecx (gcc regno = 2)
2208 2 for %edx (gcc regno = 1)
2209 3 for %ebx (gcc regno = 3)
2210 4 for %esp (gcc regno = 7)
2211 5 for %ebp (gcc regno = 6)
2212 6 for %esi (gcc regno = 4)
2213 7 for %edi (gcc regno = 5)
2214 The following three DWARF register numbers are never generated by
2215 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2216 believes these numbers have these meanings.
2217 8 for %eip (no gcc equivalent)
2218 9 for %eflags (gcc regno = 17)
2219 10 for %trapno (no gcc equivalent)
2220 It is not at all clear how we should number the FP stack registers
2221 for the x86 architecture. If the version of SDB on x86/svr4 were
2222 a bit less brain dead with respect to floating-point then we would
2223 have a precedent to follow with respect to DWARF register numbers
2224 for x86 FP registers, but the SDB on x86/svr4 is so completely
2225 broken with respect to FP registers that it is hardly worth thinking
2226 of it as something to strive for compatibility with.
2227 The version of x86/svr4 SDB I have at the moment does (partially)
2228 seem to believe that DWARF register number 11 is associated with
2229 the x86 register %st(0), but that's about all. Higher DWARF
2230 register numbers don't seem to be associated with anything in
2231 particular, and even for DWARF regno 11, SDB only seems to under-
2232 stand that it should say that a variable lives in %st(0) (when
2233 asked via an `=' command) if we said it was in DWARF regno 11,
2234 but SDB still prints garbage when asked for the value of the
2235 variable in question (via a `/' command).
2236 (Also note that the labels SDB prints for various FP stack regs
2237 when doing an `x' command are all wrong.)
2238 Note that these problems generally don't affect the native SVR4
2239 C compiler because it doesn't allow the use of -O with -g and
2240 because when it is *not* optimizing, it allocates a memory
2241 location for each floating-point variable, and the memory
2242 location is what gets described in the DWARF AT_location
2243 attribute for the variable in question.
2244 Regardless of the severe mental illness of the x86/svr4 SDB, we
2245 do something sensible here and we use the following DWARF
2246 register numbers. Note that these are all stack-top-relative
2248 11 for %st(0) (gcc regno = 8)
2249 12 for %st(1) (gcc regno = 9)
2250 13 for %st(2) (gcc regno = 10)
2251 14 for %st(3) (gcc regno = 11)
2252 15 for %st(4) (gcc regno = 12)
2253 16 for %st(5) (gcc regno = 13)
2254 17 for %st(6) (gcc regno = 14)
2255 18 for %st(7) (gcc regno = 15)
2257 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2259 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2260 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2261 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2262 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2263 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2264 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2265 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2267 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2268 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2269 101, 102, 103, 104, /* bound registers */
2272 /* Define parameter passing and return registers. */
2274 static int const x86_64_int_parameter_registers[6] =
2276 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2279 static int const x86_64_ms_abi_int_parameter_registers[4] =
2281 CX_REG, DX_REG, R8_REG, R9_REG
2284 static int const x86_64_int_return_registers[4] =
2286 AX_REG, DX_REG, DI_REG, SI_REG
2289 /* Additional registers that are clobbered by SYSV calls. */
2291 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2295 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2296 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2299 /* Define the structure for the machine field in struct function. */
2301 struct GTY(()) stack_local_entry {
2302 unsigned short mode;
2305 struct stack_local_entry *next;
2308 /* Structure describing stack frame layout.
2309 Stack grows downward:
2315 saved static chain if ix86_static_chain_on_stack
2317 saved frame pointer if frame_pointer_needed
2318 <- HARD_FRAME_POINTER
2324 <- sse_regs_save_offset
2327 [va_arg registers] |
2331 [padding2] | = to_allocate
2340 int outgoing_arguments_size;
2342 /* The offsets relative to ARG_POINTER. */
2343 HOST_WIDE_INT frame_pointer_offset;
2344 HOST_WIDE_INT hard_frame_pointer_offset;
2345 HOST_WIDE_INT stack_pointer_offset;
2346 HOST_WIDE_INT hfp_save_offset;
2347 HOST_WIDE_INT reg_save_offset;
2348 HOST_WIDE_INT sse_reg_save_offset;
2350 /* When save_regs_using_mov is set, emit prologue using
2351 move instead of push instructions. */
2352 bool save_regs_using_mov;
2355 /* Which cpu are we scheduling for. */
2356 enum attr_cpu ix86_schedule;
2358 /* Which cpu are we optimizing for. */
2359 enum processor_type ix86_tune;
2361 /* Which instruction set architecture to use. */
2362 enum processor_type ix86_arch;
2364 /* True if processor has SSE prefetch instruction. */
2365 unsigned char x86_prefetch_sse;
2367 /* -mstackrealign option */
2368 static const char ix86_force_align_arg_pointer_string[]
2369 = "force_align_arg_pointer";
2371 static rtx (*ix86_gen_leave) (void);
2372 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2373 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2374 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2375 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2376 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2377 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2378 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2379 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2380 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2381 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2382 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2384 /* Preferred alignment for stack boundary in bits. */
2385 unsigned int ix86_preferred_stack_boundary;
2387 /* Alignment for incoming stack boundary in bits specified at
2389 static unsigned int ix86_user_incoming_stack_boundary;
2391 /* Default alignment for incoming stack boundary in bits. */
2392 static unsigned int ix86_default_incoming_stack_boundary;
2394 /* Alignment for incoming stack boundary in bits. */
2395 unsigned int ix86_incoming_stack_boundary;
2397 /* Calling abi specific va_list type nodes. */
2398 static GTY(()) tree sysv_va_list_type_node;
2399 static GTY(()) tree ms_va_list_type_node;
2401 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2402 char internal_label_prefix[16];
2403 int internal_label_prefix_len;
2405 /* Fence to use after loop using movnt. */
2408 /* Register class used for passing given 64bit part of the argument.
2409 These represent classes as documented by the PS ABI, with the exception
2410 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2411 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2413 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2414 whenever possible (upper half does contain padding). */
2415 enum x86_64_reg_class
2418 X86_64_INTEGER_CLASS,
2419 X86_64_INTEGERSI_CLASS,
2426 X86_64_COMPLEX_X87_CLASS,
2430 #define MAX_CLASSES 8
2432 /* Table of constants used by fldpi, fldln2, etc.... */
2433 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2434 static bool ext_80387_constants_init = 0;
2437 static struct machine_function * ix86_init_machine_status (void);
2438 static rtx ix86_function_value (const_tree, const_tree, bool);
2439 static bool ix86_function_value_regno_p (const unsigned int);
2440 static unsigned int ix86_function_arg_boundary (machine_mode,
2442 static rtx ix86_static_chain (const_tree, bool);
2443 static int ix86_function_regparm (const_tree, const_tree);
2444 static void ix86_compute_frame_layout (struct ix86_frame *);
2445 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2447 static void ix86_add_new_builtins (HOST_WIDE_INT);
2448 static tree ix86_canonical_va_list_type (tree);
2449 static void predict_jump (int);
2450 static unsigned int split_stack_prologue_scratch_regno (void);
2451 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2453 enum ix86_function_specific_strings
2455 IX86_FUNCTION_SPECIFIC_ARCH,
2456 IX86_FUNCTION_SPECIFIC_TUNE,
2457 IX86_FUNCTION_SPECIFIC_MAX
2460 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2461 const char *, enum fpmath_unit, bool);
2462 static void ix86_function_specific_save (struct cl_target_option *,
2463 struct gcc_options *opts);
2464 static void ix86_function_specific_restore (struct gcc_options *opts,
2465 struct cl_target_option *);
2466 static void ix86_function_specific_print (FILE *, int,
2467 struct cl_target_option *);
2468 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2469 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2470 struct gcc_options *,
2471 struct gcc_options *,
2472 struct gcc_options *);
2473 static bool ix86_can_inline_p (tree, tree);
2474 static void ix86_set_current_function (tree);
2475 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2477 static enum calling_abi ix86_function_abi (const_tree);
2480 #ifndef SUBTARGET32_DEFAULT_CPU
2481 #define SUBTARGET32_DEFAULT_CPU "i386"
2484 /* Whether -mtune= or -march= were specified */
2485 static int ix86_tune_defaulted;
2486 static int ix86_arch_specified;
2488 /* Vectorization library interface and handlers. */
2489 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2491 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2492 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2494 /* Processor target table, indexed by processor number */
2497 const char *const name; /* processor name */
2498 const struct processor_costs *cost; /* Processor costs */
2499 const int align_loop; /* Default alignments. */
2500 const int align_loop_max_skip;
2501 const int align_jump;
2502 const int align_jump_max_skip;
2503 const int align_func;
2506 /* This table must be in sync with enum processor_type in i386.h. */
2507 static const struct ptt processor_target_table[PROCESSOR_max] =
2509 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2510 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2511 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2512 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2513 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2514 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2515 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2516 {"core2", &core_cost, 16, 10, 16, 10, 16},
2517 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2518 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2519 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2520 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2521 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2522 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2523 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2524 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2525 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2526 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2527 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2528 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2529 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2530 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2531 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2532 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2533 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2534 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2538 rest_of_handle_insert_vzeroupper (void)
2542 /* vzeroupper instructions are inserted immediately after reload to
2543 account for possible spills from 256bit registers. The pass
2544 reuses mode switching infrastructure by re-running mode insertion
2545 pass, so disable entities that have already been processed. */
2546 for (i = 0; i < MAX_386_ENTITIES; i++)
2547 ix86_optimize_mode_switching[i] = 0;
2549 ix86_optimize_mode_switching[AVX_U128] = 1;
2551 /* Call optimize_mode_switching. */
2552 g->get_passes ()->execute_pass_mode_switching ();
2558 const pass_data pass_data_insert_vzeroupper =
2560 RTL_PASS, /* type */
2561 "vzeroupper", /* name */
2562 OPTGROUP_NONE, /* optinfo_flags */
2563 TV_NONE, /* tv_id */
2564 0, /* properties_required */
2565 0, /* properties_provided */
2566 0, /* properties_destroyed */
2567 0, /* todo_flags_start */
2568 TODO_df_finish, /* todo_flags_finish */
2571 class pass_insert_vzeroupper : public rtl_opt_pass
2574 pass_insert_vzeroupper(gcc::context *ctxt)
2575 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2578 /* opt_pass methods: */
2579 virtual bool gate (function *)
2581 return TARGET_AVX && !TARGET_AVX512F
2582 && TARGET_VZEROUPPER && flag_expensive_optimizations
2586 virtual unsigned int execute (function *)
2588 return rest_of_handle_insert_vzeroupper ();
2591 }; // class pass_insert_vzeroupper
2596 make_pass_insert_vzeroupper (gcc::context *ctxt)
2598 return new pass_insert_vzeroupper (ctxt);
2601 /* Return true if a red-zone is in use. */
2604 ix86_using_red_zone (void)
2606 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2609 /* Return a string that documents the current -m options. The caller is
2610 responsible for freeing the string. */
2613 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2614 const char *tune, enum fpmath_unit fpmath,
2617 struct ix86_target_opts
2619 const char *option; /* option string */
2620 HOST_WIDE_INT mask; /* isa mask options */
2623 /* This table is ordered so that options like -msse4.2 that imply
2624 preceding options while match those first. */
2625 static struct ix86_target_opts isa_opts[] =
2627 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2628 { "-mfma", OPTION_MASK_ISA_FMA },
2629 { "-mxop", OPTION_MASK_ISA_XOP },
2630 { "-mlwp", OPTION_MASK_ISA_LWP },
2631 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2632 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2633 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2634 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2635 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2636 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2637 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2638 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2639 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2640 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2641 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2642 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2643 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2644 { "-msse3", OPTION_MASK_ISA_SSE3 },
2645 { "-msse2", OPTION_MASK_ISA_SSE2 },
2646 { "-msse", OPTION_MASK_ISA_SSE },
2647 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2648 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2649 { "-mmmx", OPTION_MASK_ISA_MMX },
2650 { "-mabm", OPTION_MASK_ISA_ABM },
2651 { "-mbmi", OPTION_MASK_ISA_BMI },
2652 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2653 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2654 { "-mhle", OPTION_MASK_ISA_HLE },
2655 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2656 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2657 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2658 { "-madx", OPTION_MASK_ISA_ADX },
2659 { "-mtbm", OPTION_MASK_ISA_TBM },
2660 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2661 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2662 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2663 { "-maes", OPTION_MASK_ISA_AES },
2664 { "-msha", OPTION_MASK_ISA_SHA },
2665 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2666 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2667 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2668 { "-mf16c", OPTION_MASK_ISA_F16C },
2669 { "-mrtm", OPTION_MASK_ISA_RTM },
2670 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2671 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2672 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2673 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2674 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2675 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2676 { "-mmpx", OPTION_MASK_ISA_MPX },
2677 { "-mclwb", OPTION_MASK_ISA_CLWB },
2678 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
2682 static struct ix86_target_opts flag_opts[] =
2684 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2685 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2686 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2687 { "-m80387", MASK_80387 },
2688 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2689 { "-malign-double", MASK_ALIGN_DOUBLE },
2690 { "-mcld", MASK_CLD },
2691 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2692 { "-mieee-fp", MASK_IEEE_FP },
2693 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2694 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2695 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2696 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2697 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2698 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2699 { "-mno-red-zone", MASK_NO_RED_ZONE },
2700 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2701 { "-mrecip", MASK_RECIP },
2702 { "-mrtd", MASK_RTD },
2703 { "-msseregparm", MASK_SSEREGPARM },
2704 { "-mstack-arg-probe", MASK_STACK_PROBE },
2705 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2706 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2707 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2708 { "-mvzeroupper", MASK_VZEROUPPER },
2709 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2710 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2711 { "-mprefer-avx128", MASK_PREFER_AVX128},
2714 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2717 char target_other[40];
2727 memset (opts, '\0', sizeof (opts));
2729 /* Add -march= option. */
2732 opts[num][0] = "-march=";
2733 opts[num++][1] = arch;
2736 /* Add -mtune= option. */
2739 opts[num][0] = "-mtune=";
2740 opts[num++][1] = tune;
2743 /* Add -m32/-m64/-mx32. */
2744 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2746 if ((isa & OPTION_MASK_ABI_64) != 0)
2750 isa &= ~ (OPTION_MASK_ISA_64BIT
2751 | OPTION_MASK_ABI_64
2752 | OPTION_MASK_ABI_X32);
2756 opts[num++][0] = abi;
2758 /* Pick out the options in isa options. */
2759 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2761 if ((isa & isa_opts[i].mask) != 0)
2763 opts[num++][0] = isa_opts[i].option;
2764 isa &= ~ isa_opts[i].mask;
2768 if (isa && add_nl_p)
2770 opts[num++][0] = isa_other;
2771 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2775 /* Add flag options. */
2776 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2778 if ((flags & flag_opts[i].mask) != 0)
2780 opts[num++][0] = flag_opts[i].option;
2781 flags &= ~ flag_opts[i].mask;
2785 if (flags && add_nl_p)
2787 opts[num++][0] = target_other;
2788 sprintf (target_other, "(other flags: %#x)", flags);
2791 /* Add -fpmath= option. */
2794 opts[num][0] = "-mfpmath=";
2795 switch ((int) fpmath)
2798 opts[num++][1] = "387";
2802 opts[num++][1] = "sse";
2805 case FPMATH_387 | FPMATH_SSE:
2806 opts[num++][1] = "sse+387";
2818 gcc_assert (num < ARRAY_SIZE (opts));
2820 /* Size the string. */
2822 sep_len = (add_nl_p) ? 3 : 1;
2823 for (i = 0; i < num; i++)
2826 for (j = 0; j < 2; j++)
2828 len += strlen (opts[i][j]);
2831 /* Build the string. */
2832 ret = ptr = (char *) xmalloc (len);
2835 for (i = 0; i < num; i++)
2839 for (j = 0; j < 2; j++)
2840 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2847 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2855 for (j = 0; j < 2; j++)
2858 memcpy (ptr, opts[i][j], len2[j]);
2860 line_len += len2[j];
2865 gcc_assert (ret + len >= ptr);
2870 /* Return true, if profiling code should be emitted before
2871 prologue. Otherwise it returns false.
2872 Note: For x86 with "hotfix" it is sorried. */
2874 ix86_profile_before_prologue (void)
2876 return flag_fentry != 0;
2879 /* Function that is callable from the debugger to print the current
2881 void ATTRIBUTE_UNUSED
2882 ix86_debug_options (void)
2884 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2885 ix86_arch_string, ix86_tune_string,
2890 fprintf (stderr, "%s\n\n", opts);
2894 fputs ("<no options>\n\n", stderr);
2899 static const char *stringop_alg_names[] = {
2901 #define DEF_ALG(alg, name) #name,
2902 #include "stringop.def"
2907 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2908 The string is of the following form (or comma separated list of it):
2910 strategy_alg:max_size:[align|noalign]
2912 where the full size range for the strategy is either [0, max_size] or
2913 [min_size, max_size], in which min_size is the max_size + 1 of the
2914 preceding range. The last size range must have max_size == -1.
2919 -mmemcpy-strategy=libcall:-1:noalign
2921 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2925 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2927 This is to tell the compiler to use the following strategy for memset
2928 1) when the expected size is between [1, 16], use rep_8byte strategy;
2929 2) when the size is between [17, 2048], use vector_loop;
2930 3) when the size is > 2048, use libcall. */
2932 struct stringop_size_range
2940 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2942 const struct stringop_algs *default_algs;
2943 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2944 char *curr_range_str, *next_range_str;
2948 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2950 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2952 curr_range_str = strategy_str;
2959 next_range_str = strchr (curr_range_str, ',');
2961 *next_range_str++ = '\0';
2963 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2964 alg_name, &maxs, align))
2966 error ("wrong arg %s to option %s", curr_range_str,
2967 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2971 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2973 error ("size ranges of option %s should be increasing",
2974 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2978 for (i = 0; i < last_alg; i++)
2979 if (!strcmp (alg_name, stringop_alg_names[i]))
2984 error ("wrong stringop strategy name %s specified for option %s",
2986 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2990 input_ranges[n].max = maxs;
2991 input_ranges[n].alg = (stringop_alg) i;
2992 if (!strcmp (align, "align"))
2993 input_ranges[n].noalign = false;
2994 else if (!strcmp (align, "noalign"))
2995 input_ranges[n].noalign = true;
2998 error ("unknown alignment %s specified for option %s",
2999 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3003 curr_range_str = next_range_str;
3005 while (curr_range_str);
3007 if (input_ranges[n - 1].max != -1)
3009 error ("the max value for the last size range should be -1"
3011 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3015 if (n > MAX_STRINGOP_ALGS)
3017 error ("too many size ranges specified in option %s",
3018 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3022 /* Now override the default algs array. */
3023 for (i = 0; i < n; i++)
3025 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3026 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3027 = input_ranges[i].alg;
3028 *const_cast<int *>(&default_algs->size[i].noalign)
3029 = input_ranges[i].noalign;
3034 /* parse -mtune-ctrl= option. When DUMP is true,
3035 print the features that are explicitly set. */
3038 parse_mtune_ctrl_str (bool dump)
3040 if (!ix86_tune_ctrl_string)
3043 char *next_feature_string = NULL;
3044 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3045 char *orig = curr_feature_string;
3051 next_feature_string = strchr (curr_feature_string, ',');
3052 if (next_feature_string)
3053 *next_feature_string++ = '\0';
3054 if (*curr_feature_string == '^')
3056 curr_feature_string++;
3059 for (i = 0; i < X86_TUNE_LAST; i++)
3061 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3063 ix86_tune_features[i] = !clear;
3065 fprintf (stderr, "Explicitly %s feature %s\n",
3066 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3070 if (i == X86_TUNE_LAST)
3071 error ("Unknown parameter to option -mtune-ctrl: %s",
3072 clear ? curr_feature_string - 1 : curr_feature_string);
3073 curr_feature_string = next_feature_string;
3075 while (curr_feature_string);
3079 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3083 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3085 unsigned int ix86_tune_mask = 1u << ix86_tune;
3088 for (i = 0; i < X86_TUNE_LAST; ++i)
3090 if (ix86_tune_no_default)
3091 ix86_tune_features[i] = 0;
3093 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3098 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3099 for (i = 0; i < X86_TUNE_LAST; i++)
3100 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3101 ix86_tune_features[i] ? "on" : "off");
3104 parse_mtune_ctrl_str (dump);
3108 /* Override various settings based on options. If MAIN_ARGS_P, the
3109 options are from the command line, otherwise they are from
3113 ix86_option_override_internal (bool main_args_p,
3114 struct gcc_options *opts,
3115 struct gcc_options *opts_set)
3118 unsigned int ix86_arch_mask;
3119 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3124 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3125 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3126 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3127 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3128 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3129 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3130 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3131 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3132 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3133 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3134 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3135 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3136 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3137 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3138 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3139 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3140 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3141 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3142 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3143 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3144 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3145 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3146 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3147 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3148 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3149 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3150 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3151 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3152 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3153 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3154 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3155 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3156 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3157 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3158 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3159 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3160 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3161 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3162 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3163 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3164 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3165 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3166 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3167 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3168 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3169 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3170 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3171 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3172 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3173 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3174 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3175 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3176 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3177 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3178 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3179 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3180 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
3183 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3184 | PTA_CX16 | PTA_FXSR)
3185 #define PTA_NEHALEM \
3186 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3187 #define PTA_WESTMERE \
3188 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3189 #define PTA_SANDYBRIDGE \
3190 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3191 #define PTA_IVYBRIDGE \
3192 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3193 #define PTA_HASWELL \
3194 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3195 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3196 #define PTA_BROADWELL \
3197 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3199 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3200 #define PTA_BONNELL \
3201 (PTA_CORE2 | PTA_MOVBE)
3202 #define PTA_SILVERMONT \
3203 (PTA_WESTMERE | PTA_MOVBE)
3205 /* if this reaches 64, need to widen struct pta flags below */
3209 const char *const name; /* processor name or nickname. */
3210 const enum processor_type processor;
3211 const enum attr_cpu schedule;
3212 const unsigned HOST_WIDE_INT flags;
3214 const processor_alias_table[] =
3216 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3217 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3218 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3219 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3220 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3221 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3222 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3223 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3224 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3225 PTA_MMX | PTA_SSE | PTA_FXSR},
3226 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3227 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3228 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3229 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3230 PTA_MMX | PTA_SSE | PTA_FXSR},
3231 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3232 PTA_MMX | PTA_SSE | PTA_FXSR},
3233 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3234 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3235 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3236 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3237 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3238 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3239 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3240 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3241 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3242 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3243 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3244 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3245 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3246 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3247 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3248 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3250 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3252 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3254 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3256 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3257 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3258 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3259 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3260 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3261 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3262 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3263 {"knl", PROCESSOR_KNL, CPU_KNL, PTA_KNL},
3264 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3265 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3266 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3267 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3268 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3269 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3270 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3271 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3272 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3273 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3274 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3275 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3276 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3277 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3278 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3279 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3280 {"x86-64", PROCESSOR_K8, CPU_K8,
3281 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3282 {"k8", PROCESSOR_K8, CPU_K8,
3283 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3284 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3285 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3286 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3287 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3288 {"opteron", PROCESSOR_K8, CPU_K8,
3289 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3290 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3291 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3292 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3293 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3294 {"athlon64", PROCESSOR_K8, CPU_K8,
3295 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3296 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3297 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3298 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3299 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3300 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3301 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3302 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3303 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3304 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3305 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3306 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3307 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3308 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3309 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3310 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3311 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3312 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3313 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3314 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3315 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3316 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3317 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3318 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3319 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3320 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3321 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3322 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3323 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3324 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3325 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3326 | PTA_XSAVEOPT | PTA_FSGSBASE},
3327 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3328 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3329 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3330 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3331 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3332 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3333 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3335 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3336 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3337 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3338 | PTA_FXSR | PTA_XSAVE},
3339 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3340 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3341 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3342 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3343 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3344 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3346 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3348 | PTA_HLE /* flags are only used for -march switch. */ },
3351 /* -mrecip options. */
3354 const char *string; /* option name */
3355 unsigned int mask; /* mask bits to set */
3357 const recip_options[] =
3359 { "all", RECIP_MASK_ALL },
3360 { "none", RECIP_MASK_NONE },
3361 { "div", RECIP_MASK_DIV },
3362 { "sqrt", RECIP_MASK_SQRT },
3363 { "vec-div", RECIP_MASK_VEC_DIV },
3364 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3367 int const pta_size = ARRAY_SIZE (processor_alias_table);
3369 /* Set up prefix/suffix so the error messages refer to either the command
3370 line argument, or the attribute(target). */
3379 prefix = "option(\"";
3384 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3385 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3386 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3387 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3388 #ifdef TARGET_BI_ARCH
3391 #if TARGET_BI_ARCH == 1
3392 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3393 is on and OPTION_MASK_ABI_X32 is off. We turn off
3394 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3396 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3397 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3399 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3400 on and OPTION_MASK_ABI_64 is off. We turn off
3401 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3402 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3403 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3404 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3405 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3410 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3412 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3413 OPTION_MASK_ABI_64 for TARGET_X32. */
3414 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3415 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3417 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3418 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3419 | OPTION_MASK_ABI_X32
3420 | OPTION_MASK_ABI_64);
3421 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3423 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3424 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3425 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3426 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3429 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3430 SUBTARGET_OVERRIDE_OPTIONS;
3433 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3434 SUBSUBTARGET_OVERRIDE_OPTIONS;
3437 /* -fPIC is the default for x86_64. */
3438 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3439 opts->x_flag_pic = 2;
3441 /* Need to check -mtune=generic first. */
3442 if (opts->x_ix86_tune_string)
3444 /* As special support for cross compilers we read -mtune=native
3445 as -mtune=generic. With native compilers we won't see the
3446 -mtune=native, as it was changed by the driver. */
3447 if (!strcmp (opts->x_ix86_tune_string, "native"))
3449 opts->x_ix86_tune_string = "generic";
3451 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3452 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3453 "%stune=k8%s or %stune=generic%s instead as appropriate",
3454 prefix, suffix, prefix, suffix, prefix, suffix);
3458 if (opts->x_ix86_arch_string)
3459 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3460 if (!opts->x_ix86_tune_string)
3462 opts->x_ix86_tune_string
3463 = processor_target_table[TARGET_CPU_DEFAULT].name;
3464 ix86_tune_defaulted = 1;
3467 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3468 or defaulted. We need to use a sensible tune option. */
3469 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3471 opts->x_ix86_tune_string = "generic";
3475 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3476 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3478 /* rep; movq isn't available in 32-bit code. */
3479 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3480 opts->x_ix86_stringop_alg = no_stringop;
3483 if (!opts->x_ix86_arch_string)
3484 opts->x_ix86_arch_string
3485 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3486 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3488 ix86_arch_specified = 1;
3490 if (opts_set->x_ix86_pmode)
3492 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3493 && opts->x_ix86_pmode == PMODE_SI)
3494 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3495 && opts->x_ix86_pmode == PMODE_DI))
3496 error ("address mode %qs not supported in the %s bit mode",
3497 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3498 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3501 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3502 ? PMODE_DI : PMODE_SI;
3504 if (!opts_set->x_ix86_abi)
3505 opts->x_ix86_abi = DEFAULT_ABI;
3507 /* For targets using ms ABI enable ms-extensions, if not
3508 explicit turned off. For non-ms ABI we turn off this
3510 if (!opts_set->x_flag_ms_extensions)
3511 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3513 if (opts_set->x_ix86_cmodel)
3515 switch (opts->x_ix86_cmodel)
3519 if (opts->x_flag_pic)
3520 opts->x_ix86_cmodel = CM_SMALL_PIC;
3521 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3522 error ("code model %qs not supported in the %s bit mode",
3528 if (opts->x_flag_pic)
3529 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3530 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3531 error ("code model %qs not supported in the %s bit mode",
3533 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3534 error ("code model %qs not supported in x32 mode",
3540 if (opts->x_flag_pic)
3541 opts->x_ix86_cmodel = CM_LARGE_PIC;
3542 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3543 error ("code model %qs not supported in the %s bit mode",
3545 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3546 error ("code model %qs not supported in x32 mode",
3551 if (opts->x_flag_pic)
3552 error ("code model %s does not support PIC mode", "32");
3553 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3554 error ("code model %qs not supported in the %s bit mode",
3559 if (opts->x_flag_pic)
3561 error ("code model %s does not support PIC mode", "kernel");
3562 opts->x_ix86_cmodel = CM_32;
3564 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3565 error ("code model %qs not supported in the %s bit mode",
3575 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3576 use of rip-relative addressing. This eliminates fixups that
3577 would otherwise be needed if this object is to be placed in a
3578 DLL, and is essentially just as efficient as direct addressing. */
3579 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3580 && (TARGET_RDOS || TARGET_PECOFF))
3581 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3582 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3583 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3585 opts->x_ix86_cmodel = CM_32;
3587 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3589 error ("-masm=intel not supported in this configuration");
3590 opts->x_ix86_asm_dialect = ASM_ATT;
3592 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3593 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3594 sorry ("%i-bit mode not compiled in",
3595 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3597 for (i = 0; i < pta_size; i++)
3598 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3600 ix86_schedule = processor_alias_table[i].schedule;
3601 ix86_arch = processor_alias_table[i].processor;
3602 /* Default cpu tuning to the architecture. */
3603 ix86_tune = ix86_arch;
3605 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3606 && !(processor_alias_table[i].flags & PTA_64BIT))
3607 error ("CPU you selected does not support x86-64 "
3610 if (processor_alias_table[i].flags & PTA_MMX
3611 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3612 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3613 if (processor_alias_table[i].flags & PTA_3DNOW
3614 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3615 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3616 if (processor_alias_table[i].flags & PTA_3DNOW_A
3617 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3618 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3619 if (processor_alias_table[i].flags & PTA_SSE
3620 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3621 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3622 if (processor_alias_table[i].flags & PTA_SSE2
3623 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3624 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3625 if (processor_alias_table[i].flags & PTA_SSE3
3626 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3627 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3628 if (processor_alias_table[i].flags & PTA_SSSE3
3629 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3630 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3631 if (processor_alias_table[i].flags & PTA_SSE4_1
3632 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3633 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3634 if (processor_alias_table[i].flags & PTA_SSE4_2
3635 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3636 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3637 if (processor_alias_table[i].flags & PTA_AVX
3638 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3639 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3640 if (processor_alias_table[i].flags & PTA_AVX2
3641 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3642 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3643 if (processor_alias_table[i].flags & PTA_FMA
3644 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3645 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3646 if (processor_alias_table[i].flags & PTA_SSE4A
3647 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3648 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3649 if (processor_alias_table[i].flags & PTA_FMA4
3650 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3651 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3652 if (processor_alias_table[i].flags & PTA_XOP
3653 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3654 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3655 if (processor_alias_table[i].flags & PTA_LWP
3656 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3657 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3658 if (processor_alias_table[i].flags & PTA_ABM
3659 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3660 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3661 if (processor_alias_table[i].flags & PTA_BMI
3662 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3663 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3664 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3665 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3666 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3667 if (processor_alias_table[i].flags & PTA_TBM
3668 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3669 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3670 if (processor_alias_table[i].flags & PTA_BMI2
3671 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3672 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3673 if (processor_alias_table[i].flags & PTA_CX16
3674 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3675 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3676 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3677 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3678 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3679 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3680 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3681 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3682 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3683 if (processor_alias_table[i].flags & PTA_MOVBE
3684 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3685 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3686 if (processor_alias_table[i].flags & PTA_AES
3687 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3688 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3689 if (processor_alias_table[i].flags & PTA_SHA
3690 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3691 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3692 if (processor_alias_table[i].flags & PTA_PCLMUL
3693 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3694 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3695 if (processor_alias_table[i].flags & PTA_FSGSBASE
3696 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3697 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3698 if (processor_alias_table[i].flags & PTA_RDRND
3699 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3700 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3701 if (processor_alias_table[i].flags & PTA_F16C
3702 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3703 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3704 if (processor_alias_table[i].flags & PTA_RTM
3705 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3706 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3707 if (processor_alias_table[i].flags & PTA_HLE
3708 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3709 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3710 if (processor_alias_table[i].flags & PTA_PRFCHW
3711 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3712 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3713 if (processor_alias_table[i].flags & PTA_RDSEED
3714 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3715 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3716 if (processor_alias_table[i].flags & PTA_ADX
3717 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3718 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3719 if (processor_alias_table[i].flags & PTA_FXSR
3720 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3721 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3722 if (processor_alias_table[i].flags & PTA_XSAVE
3723 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3724 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3725 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3726 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3727 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3728 if (processor_alias_table[i].flags & PTA_AVX512F
3729 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3730 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3731 if (processor_alias_table[i].flags & PTA_AVX512ER
3732 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3733 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3734 if (processor_alias_table[i].flags & PTA_AVX512PF
3735 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3736 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3737 if (processor_alias_table[i].flags & PTA_AVX512CD
3738 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3739 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3740 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3741 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3742 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3743 if (processor_alias_table[i].flags & PTA_PCOMMIT
3744 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3745 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3746 if (processor_alias_table[i].flags & PTA_CLWB
3747 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3748 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3749 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3750 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3751 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3752 if (processor_alias_table[i].flags & PTA_XSAVEC
3753 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3754 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3755 if (processor_alias_table[i].flags & PTA_XSAVES
3756 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3757 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3758 if (processor_alias_table[i].flags & PTA_AVX512DQ
3759 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3760 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3761 if (processor_alias_table[i].flags & PTA_AVX512BW
3762 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3763 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3764 if (processor_alias_table[i].flags & PTA_AVX512VL
3765 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3766 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3767 if (processor_alias_table[i].flags & PTA_MPX
3768 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3769 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3770 if (processor_alias_table[i].flags & PTA_AVX512VBMI
3771 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3772 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3773 if (processor_alias_table[i].flags & PTA_AVX512IFMA
3774 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3775 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3776 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3777 x86_prefetch_sse = true;
3782 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3783 error ("Intel MPX does not support x32");
3785 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3786 error ("Intel MPX does not support x32");
3788 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3789 error ("generic CPU can be used only for %stune=%s %s",
3790 prefix, suffix, sw);
3791 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3792 error ("intel CPU can be used only for %stune=%s %s",
3793 prefix, suffix, sw);
3794 else if (i == pta_size)
3795 error ("bad value (%s) for %sarch=%s %s",
3796 opts->x_ix86_arch_string, prefix, suffix, sw);
3798 ix86_arch_mask = 1u << ix86_arch;
3799 for (i = 0; i < X86_ARCH_LAST; ++i)
3800 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3802 for (i = 0; i < pta_size; i++)
3803 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3805 ix86_schedule = processor_alias_table[i].schedule;
3806 ix86_tune = processor_alias_table[i].processor;
3807 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3809 if (!(processor_alias_table[i].flags & PTA_64BIT))
3811 if (ix86_tune_defaulted)
3813 opts->x_ix86_tune_string = "x86-64";
3814 for (i = 0; i < pta_size; i++)
3815 if (! strcmp (opts->x_ix86_tune_string,
3816 processor_alias_table[i].name))
3818 ix86_schedule = processor_alias_table[i].schedule;
3819 ix86_tune = processor_alias_table[i].processor;
3822 error ("CPU you selected does not support x86-64 "
3826 /* Intel CPUs have always interpreted SSE prefetch instructions as
3827 NOPs; so, we can enable SSE prefetch instructions even when
3828 -mtune (rather than -march) points us to a processor that has them.
3829 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3830 higher processors. */
3832 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3833 x86_prefetch_sse = true;
3837 if (ix86_tune_specified && i == pta_size)
3838 error ("bad value (%s) for %stune=%s %s",
3839 opts->x_ix86_tune_string, prefix, suffix, sw);
3841 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3843 #ifndef USE_IX86_FRAME_POINTER
3844 #define USE_IX86_FRAME_POINTER 0
3847 #ifndef USE_X86_64_FRAME_POINTER
3848 #define USE_X86_64_FRAME_POINTER 0
3851 /* Set the default values for switches whose default depends on TARGET_64BIT
3852 in case they weren't overwritten by command line options. */
3853 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3855 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3856 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3857 if (opts->x_flag_asynchronous_unwind_tables
3858 && !opts_set->x_flag_unwind_tables
3859 && TARGET_64BIT_MS_ABI)
3860 opts->x_flag_unwind_tables = 1;
3861 if (opts->x_flag_asynchronous_unwind_tables == 2)
3862 opts->x_flag_unwind_tables
3863 = opts->x_flag_asynchronous_unwind_tables = 1;
3864 if (opts->x_flag_pcc_struct_return == 2)
3865 opts->x_flag_pcc_struct_return = 0;
3869 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3870 opts->x_flag_omit_frame_pointer
3871 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3872 if (opts->x_flag_asynchronous_unwind_tables == 2)
3873 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3874 if (opts->x_flag_pcc_struct_return == 2)
3875 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3878 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3879 /* TODO: ix86_cost should be chosen at instruction or function granuality
3880 so for cold code we use size_cost even in !optimize_size compilation. */
3881 if (opts->x_optimize_size)
3882 ix86_cost = &ix86_size_cost;
3884 ix86_cost = ix86_tune_cost;
3886 /* Arrange to set up i386_stack_locals for all functions. */
3887 init_machine_status = ix86_init_machine_status;
3889 /* Validate -mregparm= value. */
3890 if (opts_set->x_ix86_regparm)
3892 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3893 warning (0, "-mregparm is ignored in 64-bit mode");
3894 if (opts->x_ix86_regparm > REGPARM_MAX)
3896 error ("-mregparm=%d is not between 0 and %d",
3897 opts->x_ix86_regparm, REGPARM_MAX);
3898 opts->x_ix86_regparm = 0;
3901 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3902 opts->x_ix86_regparm = REGPARM_MAX;
3904 /* Default align_* from the processor table. */
3905 if (opts->x_align_loops == 0)
3907 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3908 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3910 if (opts->x_align_jumps == 0)
3912 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3913 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3915 if (opts->x_align_functions == 0)
3917 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3920 /* Provide default for -mbranch-cost= value. */
3921 if (!opts_set->x_ix86_branch_cost)
3922 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
3924 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3926 opts->x_target_flags
3927 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3929 /* Enable by default the SSE and MMX builtins. Do allow the user to
3930 explicitly disable any of these. In particular, disabling SSE and
3931 MMX for kernel code is extremely useful. */
3932 if (!ix86_arch_specified)
3933 opts->x_ix86_isa_flags
3934 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3935 | TARGET_SUBTARGET64_ISA_DEFAULT)
3936 & ~opts->x_ix86_isa_flags_explicit);
3938 if (TARGET_RTD_P (opts->x_target_flags))
3939 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3943 opts->x_target_flags
3944 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3946 if (!ix86_arch_specified)
3947 opts->x_ix86_isa_flags
3948 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3950 /* i386 ABI does not specify red zone. It still makes sense to use it
3951 when programmer takes care to stack from being destroyed. */
3952 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3953 opts->x_target_flags |= MASK_NO_RED_ZONE;
3956 /* Keep nonleaf frame pointers. */
3957 if (opts->x_flag_omit_frame_pointer)
3958 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3959 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3960 opts->x_flag_omit_frame_pointer = 1;
3962 /* If we're doing fast math, we don't care about comparison order
3963 wrt NaNs. This lets us use a shorter comparison sequence. */
3964 if (opts->x_flag_finite_math_only)
3965 opts->x_target_flags &= ~MASK_IEEE_FP;
3967 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3968 since the insns won't need emulation. */
3969 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3970 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3972 /* Likewise, if the target doesn't have a 387, or we've specified
3973 software floating point, don't use 387 inline intrinsics. */
3974 if (!TARGET_80387_P (opts->x_target_flags))
3975 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3977 /* Turn on MMX builtins for -msse. */
3978 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3979 opts->x_ix86_isa_flags
3980 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3982 /* Enable SSE prefetch. */
3983 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3984 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3985 x86_prefetch_sse = true;
3987 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3988 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3989 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3990 opts->x_ix86_isa_flags
3991 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3993 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3994 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3995 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3996 opts->x_ix86_isa_flags
3997 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3999 /* Enable lzcnt instruction for -mabm. */
4000 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4001 opts->x_ix86_isa_flags
4002 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4004 /* Validate -mpreferred-stack-boundary= value or default it to
4005 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4006 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4007 if (opts_set->x_ix86_preferred_stack_boundary_arg)
4009 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4010 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
4011 int max = (TARGET_SEH ? 4 : 12);
4013 if (opts->x_ix86_preferred_stack_boundary_arg < min
4014 || opts->x_ix86_preferred_stack_boundary_arg > max)
4017 error ("-mpreferred-stack-boundary is not supported "
4020 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4021 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4024 ix86_preferred_stack_boundary
4025 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4028 /* Set the default value for -mstackrealign. */
4029 if (opts->x_ix86_force_align_arg_pointer == -1)
4030 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4032 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4034 /* Validate -mincoming-stack-boundary= value or default it to
4035 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4036 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4037 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4039 if (opts->x_ix86_incoming_stack_boundary_arg
4040 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
4041 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4042 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4043 opts->x_ix86_incoming_stack_boundary_arg,
4044 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
4047 ix86_user_incoming_stack_boundary
4048 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4049 ix86_incoming_stack_boundary
4050 = ix86_user_incoming_stack_boundary;
4054 #ifndef NO_PROFILE_COUNTERS
4055 if (flag_nop_mcount)
4056 error ("-mnop-mcount is not compatible with this target");
4058 if (flag_nop_mcount && flag_pic)
4059 error ("-mnop-mcount is not implemented for -fPIC");
4061 /* Accept -msseregparm only if at least SSE support is enabled. */
4062 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4063 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4064 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4066 if (opts_set->x_ix86_fpmath)
4068 if (opts->x_ix86_fpmath & FPMATH_SSE)
4070 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4072 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4073 opts->x_ix86_fpmath = FPMATH_387;
4075 else if ((opts->x_ix86_fpmath & FPMATH_387)
4076 && !TARGET_80387_P (opts->x_target_flags))
4078 warning (0, "387 instruction set disabled, using SSE arithmetics");
4079 opts->x_ix86_fpmath = FPMATH_SSE;
4083 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4084 fpmath=387. The second is however default at many targets since the
4085 extra 80bit precision of temporaries is considered to be part of ABI.
4086 Overwrite the default at least for -ffast-math.
4087 TODO: -mfpmath=both seems to produce same performing code with bit
4088 smaller binaries. It is however not clear if register allocation is
4089 ready for this setting.
4090 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4091 codegen. We may switch to 387 with -ffast-math for size optimized
4093 else if (fast_math_flags_set_p (&global_options)
4094 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4095 opts->x_ix86_fpmath = FPMATH_SSE;
4097 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4099 /* If the i387 is disabled, then do not return values in it. */
4100 if (!TARGET_80387_P (opts->x_target_flags))
4101 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4103 /* Use external vectorized library in vectorizing intrinsics. */
4104 if (opts_set->x_ix86_veclibabi_type)
4105 switch (opts->x_ix86_veclibabi_type)
4107 case ix86_veclibabi_type_svml:
4108 ix86_veclib_handler = ix86_veclibabi_svml;
4111 case ix86_veclibabi_type_acml:
4112 ix86_veclib_handler = ix86_veclibabi_acml;
4119 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4120 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4121 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4123 /* If stack probes are required, the space used for large function
4124 arguments on the stack must also be probed, so enable
4125 -maccumulate-outgoing-args so this happens in the prologue. */
4126 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4127 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4129 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4130 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4131 "for correctness", prefix, suffix);
4132 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4135 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4138 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4139 p = strchr (internal_label_prefix, 'X');
4140 internal_label_prefix_len = p - internal_label_prefix;
4144 /* When scheduling description is not available, disable scheduler pass
4145 so it won't slow down the compilation and make x87 code slower. */
4146 if (!TARGET_SCHEDULE)
4147 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4149 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4150 ix86_tune_cost->simultaneous_prefetches,
4151 opts->x_param_values,
4152 opts_set->x_param_values);
4153 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4154 ix86_tune_cost->prefetch_block,
4155 opts->x_param_values,
4156 opts_set->x_param_values);
4157 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4158 ix86_tune_cost->l1_cache_size,
4159 opts->x_param_values,
4160 opts_set->x_param_values);
4161 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4162 ix86_tune_cost->l2_cache_size,
4163 opts->x_param_values,
4164 opts_set->x_param_values);
4166 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4167 if (opts->x_flag_prefetch_loop_arrays < 0
4169 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4170 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4171 opts->x_flag_prefetch_loop_arrays = 1;
4173 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4174 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4175 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4176 targetm.expand_builtin_va_start = NULL;
4178 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4180 ix86_gen_leave = gen_leave_rex64;
4181 if (Pmode == DImode)
4183 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4184 ix86_gen_tls_local_dynamic_base_64
4185 = gen_tls_local_dynamic_base_64_di;
4189 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4190 ix86_gen_tls_local_dynamic_base_64
4191 = gen_tls_local_dynamic_base_64_si;
4195 ix86_gen_leave = gen_leave;
4197 if (Pmode == DImode)
4199 ix86_gen_add3 = gen_adddi3;
4200 ix86_gen_sub3 = gen_subdi3;
4201 ix86_gen_sub3_carry = gen_subdi3_carry;
4202 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4203 ix86_gen_andsp = gen_anddi3;
4204 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4205 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4206 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4207 ix86_gen_monitor = gen_sse3_monitor_di;
4211 ix86_gen_add3 = gen_addsi3;
4212 ix86_gen_sub3 = gen_subsi3;
4213 ix86_gen_sub3_carry = gen_subsi3_carry;
4214 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4215 ix86_gen_andsp = gen_andsi3;
4216 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4217 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4218 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4219 ix86_gen_monitor = gen_sse3_monitor_si;
4223 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4224 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4225 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4228 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4230 if (opts->x_flag_fentry > 0)
4231 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4233 opts->x_flag_fentry = 0;
4235 else if (TARGET_SEH)
4237 if (opts->x_flag_fentry == 0)
4238 sorry ("-mno-fentry isn%'t compatible with SEH");
4239 opts->x_flag_fentry = 1;
4241 else if (opts->x_flag_fentry < 0)
4243 #if defined(PROFILE_BEFORE_PROLOGUE)
4244 opts->x_flag_fentry = 1;
4246 opts->x_flag_fentry = 0;
4250 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
4251 opts->x_target_flags |= MASK_VZEROUPPER;
4252 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4253 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4254 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4255 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4256 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4257 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4258 /* Enable 128-bit AVX instruction generation
4259 for the auto-vectorizer. */
4260 if (TARGET_AVX128_OPTIMAL
4261 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4262 opts->x_target_flags |= MASK_PREFER_AVX128;
4264 if (opts->x_ix86_recip_name)
4266 char *p = ASTRDUP (opts->x_ix86_recip_name);
4268 unsigned int mask, i;
4271 while ((q = strtok (p, ",")) != NULL)
4282 if (!strcmp (q, "default"))
4283 mask = RECIP_MASK_ALL;
4286 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4287 if (!strcmp (q, recip_options[i].string))
4289 mask = recip_options[i].mask;
4293 if (i == ARRAY_SIZE (recip_options))
4295 error ("unknown option for -mrecip=%s", q);
4297 mask = RECIP_MASK_NONE;
4301 opts->x_recip_mask_explicit |= mask;
4303 opts->x_recip_mask &= ~mask;
4305 opts->x_recip_mask |= mask;
4309 if (TARGET_RECIP_P (opts->x_target_flags))
4310 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4311 else if (opts_set->x_target_flags & MASK_RECIP)
4312 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4314 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4315 for 64-bit Bionic. */
4316 if (TARGET_HAS_BIONIC
4317 && !(opts_set->x_target_flags
4318 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4319 opts->x_target_flags |= (TARGET_64BIT
4320 ? MASK_LONG_DOUBLE_128
4321 : MASK_LONG_DOUBLE_64);
4323 /* Only one of them can be active. */
4324 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4325 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4327 /* Save the initial options in case the user does function specific
4330 target_option_default_node = target_option_current_node
4331 = build_target_option_node (opts);
4333 /* Handle stack protector */
4334 if (!opts_set->x_ix86_stack_protector_guard)
4335 opts->x_ix86_stack_protector_guard
4336 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4338 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4339 if (opts->x_ix86_tune_memcpy_strategy)
4341 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4342 ix86_parse_stringop_strategy_string (str, false);
4346 if (opts->x_ix86_tune_memset_strategy)
4348 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4349 ix86_parse_stringop_strategy_string (str, true);
4354 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4357 ix86_option_override (void)
4359 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4360 struct register_pass_info insert_vzeroupper_info
4361 = { pass_insert_vzeroupper, "reload",
4362 1, PASS_POS_INSERT_AFTER
4365 ix86_option_override_internal (true, &global_options, &global_options_set);
4368 /* This needs to be done at start up. It's convenient to do it here. */
4369 register_pass (&insert_vzeroupper_info);
4372 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4374 ix86_offload_options (void)
4377 return xstrdup ("-foffload-abi=lp64");
4378 return xstrdup ("-foffload-abi=ilp32");
4381 /* Update register usage after having seen the compiler flags. */
4384 ix86_conditional_register_usage (void)
4389 /* The PIC register, if it exists, is fixed. */
4390 j = PIC_OFFSET_TABLE_REGNUM;
4391 if (j != INVALID_REGNUM)
4392 fixed_regs[j] = call_used_regs[j] = 1;
4394 /* For 32-bit targets, squash the REX registers. */
4397 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4398 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4399 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4400 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4401 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4402 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4405 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4406 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4407 : TARGET_64BIT ? (1 << 2)
4410 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4412 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4414 /* Set/reset conditionally defined registers from
4415 CALL_USED_REGISTERS initializer. */
4416 if (call_used_regs[i] > 1)
4417 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4419 /* Calculate registers of CLOBBERED_REGS register set
4420 as call used registers from GENERAL_REGS register set. */
4421 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4422 && call_used_regs[i])
4423 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4426 /* If MMX is disabled, squash the registers. */
4428 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4429 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4430 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4432 /* If SSE is disabled, squash the registers. */
4434 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4435 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4436 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4438 /* If the FPU is disabled, squash the registers. */
4439 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4440 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4441 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4442 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4444 /* If AVX512F is disabled, squash the registers. */
4445 if (! TARGET_AVX512F)
4447 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4448 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4450 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4451 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4454 /* If MPX is disabled, squash the registers. */
4456 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4457 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4461 /* Save the current options */
4464 ix86_function_specific_save (struct cl_target_option *ptr,
4465 struct gcc_options *opts)
4467 ptr->arch = ix86_arch;
4468 ptr->schedule = ix86_schedule;
4469 ptr->prefetch_sse = x86_prefetch_sse;
4470 ptr->tune = ix86_tune;
4471 ptr->branch_cost = ix86_branch_cost;
4472 ptr->tune_defaulted = ix86_tune_defaulted;
4473 ptr->arch_specified = ix86_arch_specified;
4474 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4475 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4476 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4477 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4478 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4479 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4480 ptr->x_ix86_abi = opts->x_ix86_abi;
4481 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4482 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4483 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4484 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4485 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4486 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4487 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4488 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4489 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4490 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4491 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4492 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4493 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4494 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4495 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4496 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4497 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4498 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4499 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4500 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4502 /* The fields are char but the variables are not; make sure the
4503 values fit in the fields. */
4504 gcc_assert (ptr->arch == ix86_arch);
4505 gcc_assert (ptr->schedule == ix86_schedule);
4506 gcc_assert (ptr->tune == ix86_tune);
4507 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4510 /* Restore the current options */
4513 ix86_function_specific_restore (struct gcc_options *opts,
4514 struct cl_target_option *ptr)
4516 enum processor_type old_tune = ix86_tune;
4517 enum processor_type old_arch = ix86_arch;
4518 unsigned int ix86_arch_mask;
4521 /* We don't change -fPIC. */
4522 opts->x_flag_pic = flag_pic;
4524 ix86_arch = (enum processor_type) ptr->arch;
4525 ix86_schedule = (enum attr_cpu) ptr->schedule;
4526 ix86_tune = (enum processor_type) ptr->tune;
4527 x86_prefetch_sse = ptr->prefetch_sse;
4528 opts->x_ix86_branch_cost = ptr->branch_cost;
4529 ix86_tune_defaulted = ptr->tune_defaulted;
4530 ix86_arch_specified = ptr->arch_specified;
4531 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4532 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4533 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4534 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4535 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4536 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4537 opts->x_ix86_abi = ptr->x_ix86_abi;
4538 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4539 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4540 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4541 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4542 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4543 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4544 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4545 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4546 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4547 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4548 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4549 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4550 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4551 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4552 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4553 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4554 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4555 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4556 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4557 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4558 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4559 /* TODO: ix86_cost should be chosen at instruction or function granuality
4560 so for cold code we use size_cost even in !optimize_size compilation. */
4561 if (opts->x_optimize_size)
4562 ix86_cost = &ix86_size_cost;
4564 ix86_cost = ix86_tune_cost;
4566 /* Recreate the arch feature tests if the arch changed */
4567 if (old_arch != ix86_arch)
4569 ix86_arch_mask = 1u << ix86_arch;
4570 for (i = 0; i < X86_ARCH_LAST; ++i)
4571 ix86_arch_features[i]
4572 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4575 /* Recreate the tune optimization tests */
4576 if (old_tune != ix86_tune)
4577 set_ix86_tune_features (ix86_tune, false);
4580 /* Print the current options */
4583 ix86_function_specific_print (FILE *file, int indent,
4584 struct cl_target_option *ptr)
4587 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4588 NULL, NULL, ptr->x_ix86_fpmath, false);
4590 gcc_assert (ptr->arch < PROCESSOR_max);
4591 fprintf (file, "%*sarch = %d (%s)\n",
4593 ptr->arch, processor_target_table[ptr->arch].name);
4595 gcc_assert (ptr->tune < PROCESSOR_max);
4596 fprintf (file, "%*stune = %d (%s)\n",
4598 ptr->tune, processor_target_table[ptr->tune].name);
4600 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4604 fprintf (file, "%*s%s\n", indent, "", target_string);
4605 free (target_string);
4610 /* Inner function to process the attribute((target(...))), take an argument and
4611 set the current options from the argument. If we have a list, recursively go
4615 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4616 struct gcc_options *opts,
4617 struct gcc_options *opts_set,
4618 struct gcc_options *enum_opts_set)
4623 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4624 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4625 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4626 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4627 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4643 enum ix86_opt_type type;
4648 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4649 IX86_ATTR_ISA ("abm", OPT_mabm),
4650 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4651 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4652 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4653 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4654 IX86_ATTR_ISA ("aes", OPT_maes),
4655 IX86_ATTR_ISA ("sha", OPT_msha),
4656 IX86_ATTR_ISA ("avx", OPT_mavx),
4657 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4658 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4659 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4660 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4661 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4662 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4663 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4664 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4665 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4666 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4667 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4668 IX86_ATTR_ISA ("sse", OPT_msse),
4669 IX86_ATTR_ISA ("sse2", OPT_msse2),
4670 IX86_ATTR_ISA ("sse3", OPT_msse3),
4671 IX86_ATTR_ISA ("sse4", OPT_msse4),
4672 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4673 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4674 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4675 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4676 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4677 IX86_ATTR_ISA ("fma", OPT_mfma),
4678 IX86_ATTR_ISA ("xop", OPT_mxop),
4679 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4680 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4681 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4682 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4683 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4684 IX86_ATTR_ISA ("hle", OPT_mhle),
4685 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4686 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4687 IX86_ATTR_ISA ("adx", OPT_madx),
4688 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4689 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4690 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4691 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4692 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4693 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4694 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4695 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
4696 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
4697 IX86_ATTR_ISA ("clwb", OPT_mclwb),
4698 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
4701 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4703 /* string options */
4704 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4705 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4708 IX86_ATTR_YES ("cld",
4712 IX86_ATTR_NO ("fancy-math-387",
4713 OPT_mfancy_math_387,
4714 MASK_NO_FANCY_MATH_387),
4716 IX86_ATTR_YES ("ieee-fp",
4720 IX86_ATTR_YES ("inline-all-stringops",
4721 OPT_minline_all_stringops,
4722 MASK_INLINE_ALL_STRINGOPS),
4724 IX86_ATTR_YES ("inline-stringops-dynamically",
4725 OPT_minline_stringops_dynamically,
4726 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4728 IX86_ATTR_NO ("align-stringops",
4729 OPT_mno_align_stringops,
4730 MASK_NO_ALIGN_STRINGOPS),
4732 IX86_ATTR_YES ("recip",
4738 /* If this is a list, recurse to get the options. */
4739 if (TREE_CODE (args) == TREE_LIST)
4743 for (; args; args = TREE_CHAIN (args))
4744 if (TREE_VALUE (args)
4745 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4746 p_strings, opts, opts_set,
4753 else if (TREE_CODE (args) != STRING_CST)
4755 error ("attribute %<target%> argument not a string");
4759 /* Handle multiple arguments separated by commas. */
4760 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4762 while (next_optstr && *next_optstr != '\0')
4764 char *p = next_optstr;
4766 char *comma = strchr (next_optstr, ',');
4767 const char *opt_string;
4768 size_t len, opt_len;
4773 enum ix86_opt_type type = ix86_opt_unknown;
4779 len = comma - next_optstr;
4780 next_optstr = comma + 1;
4788 /* Recognize no-xxx. */
4789 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4798 /* Find the option. */
4801 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4803 type = attrs[i].type;
4804 opt_len = attrs[i].len;
4805 if (ch == attrs[i].string[0]
4806 && ((type != ix86_opt_str && type != ix86_opt_enum)
4809 && memcmp (p, attrs[i].string, opt_len) == 0)
4812 mask = attrs[i].mask;
4813 opt_string = attrs[i].string;
4818 /* Process the option. */
4821 error ("attribute(target(\"%s\")) is unknown", orig_p);
4825 else if (type == ix86_opt_isa)
4827 struct cl_decoded_option decoded;
4829 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4830 ix86_handle_option (opts, opts_set,
4831 &decoded, input_location);
4834 else if (type == ix86_opt_yes || type == ix86_opt_no)
4836 if (type == ix86_opt_no)
4837 opt_set_p = !opt_set_p;
4840 opts->x_target_flags |= mask;
4842 opts->x_target_flags &= ~mask;
4845 else if (type == ix86_opt_str)
4849 error ("option(\"%s\") was already specified", opt_string);
4853 p_strings[opt] = xstrdup (p + opt_len);
4856 else if (type == ix86_opt_enum)
4861 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4863 set_option (opts, enum_opts_set, opt, value,
4864 p + opt_len, DK_UNSPECIFIED, input_location,
4868 error ("attribute(target(\"%s\")) is unknown", orig_p);
4880 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4883 ix86_valid_target_attribute_tree (tree args,
4884 struct gcc_options *opts,
4885 struct gcc_options *opts_set)
4887 const char *orig_arch_string = opts->x_ix86_arch_string;
4888 const char *orig_tune_string = opts->x_ix86_tune_string;
4889 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4890 int orig_tune_defaulted = ix86_tune_defaulted;
4891 int orig_arch_specified = ix86_arch_specified;
4892 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4895 struct cl_target_option *def
4896 = TREE_TARGET_OPTION (target_option_default_node);
4897 struct gcc_options enum_opts_set;
4899 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4901 /* Process each of the options on the chain. */
4902 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4903 opts_set, &enum_opts_set))
4904 return error_mark_node;
4906 /* If the changed options are different from the default, rerun
4907 ix86_option_override_internal, and then save the options away.
4908 The string options are are attribute options, and will be undone
4909 when we copy the save structure. */
4910 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4911 || opts->x_target_flags != def->x_target_flags
4912 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4913 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4914 || enum_opts_set.x_ix86_fpmath)
4916 /* If we are using the default tune= or arch=, undo the string assigned,
4917 and use the default. */
4918 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4919 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4920 else if (!orig_arch_specified)
4921 opts->x_ix86_arch_string = NULL;
4923 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4924 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4925 else if (orig_tune_defaulted)
4926 opts->x_ix86_tune_string = NULL;
4928 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4929 if (enum_opts_set.x_ix86_fpmath)
4930 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4931 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4932 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4934 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4935 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4938 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4939 ix86_option_override_internal (false, opts, opts_set);
4941 /* Add any builtin functions with the new isa if any. */
4942 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4944 /* Save the current options unless we are validating options for
4946 t = build_target_option_node (opts);
4948 opts->x_ix86_arch_string = orig_arch_string;
4949 opts->x_ix86_tune_string = orig_tune_string;
4950 opts_set->x_ix86_fpmath = orig_fpmath_set;
4952 /* Free up memory allocated to hold the strings */
4953 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4954 free (option_strings[i]);
4960 /* Hook to validate attribute((target("string"))). */
4963 ix86_valid_target_attribute_p (tree fndecl,
4964 tree ARG_UNUSED (name),
4966 int ARG_UNUSED (flags))
4968 struct gcc_options func_options;
4969 tree new_target, new_optimize;
4972 /* attribute((target("default"))) does nothing, beyond
4973 affecting multi-versioning. */
4974 if (TREE_VALUE (args)
4975 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4976 && TREE_CHAIN (args) == NULL_TREE
4977 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4980 tree old_optimize = build_optimization_node (&global_options);
4982 /* Get the optimization options of the current function. */
4983 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4986 func_optimize = old_optimize;
4988 /* Init func_options. */
4989 memset (&func_options, 0, sizeof (func_options));
4990 init_options_struct (&func_options, NULL);
4991 lang_hooks.init_options_struct (&func_options);
4993 cl_optimization_restore (&func_options,
4994 TREE_OPTIMIZATION (func_optimize));
4996 /* Initialize func_options to the default before its target options can
4998 cl_target_option_restore (&func_options,
4999 TREE_TARGET_OPTION (target_option_default_node));
5001 new_target = ix86_valid_target_attribute_tree (args, &func_options,
5002 &global_options_set);
5004 new_optimize = build_optimization_node (&func_options);
5006 if (new_target == error_mark_node)
5009 else if (fndecl && new_target)
5011 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5013 if (old_optimize != new_optimize)
5014 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5021 /* Hook to determine if one function can safely inline another. */
5024 ix86_can_inline_p (tree caller, tree callee)
5027 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5028 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5030 /* If callee has no option attributes, then it is ok to inline. */
5034 /* If caller has no option attributes, but callee does then it is not ok to
5036 else if (!caller_tree)
5041 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5042 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5044 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5045 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5047 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5048 != callee_opts->x_ix86_isa_flags)
5051 /* See if we have the same non-isa options. */
5052 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5055 /* See if arch, tune, etc. are the same. */
5056 else if (caller_opts->arch != callee_opts->arch)
5059 else if (caller_opts->tune != callee_opts->tune)
5062 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5065 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5076 /* Remember the last target of ix86_set_current_function. */
5077 static GTY(()) tree ix86_previous_fndecl;
5079 /* Set target globals to default. */
5082 ix86_reset_to_default_globals (void)
5084 tree old_tree = (ix86_previous_fndecl
5085 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
5090 tree new_tree = target_option_current_node;
5091 cl_target_option_restore (&global_options,
5092 TREE_TARGET_OPTION (new_tree));
5093 if (TREE_TARGET_GLOBALS (new_tree))
5094 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5095 else if (new_tree == target_option_default_node)
5096 restore_target_globals (&default_target_globals);
5098 TREE_TARGET_GLOBALS (new_tree)
5099 = save_target_globals_default_opts ();
5103 /* Invalidate ix86_previous_fndecl cache. */
5105 ix86_reset_previous_fndecl (void)
5107 ix86_reset_to_default_globals ();
5108 ix86_previous_fndecl = NULL_TREE;
5111 /* Establish appropriate back-end context for processing the function
5112 FNDECL. The argument might be NULL to indicate processing at top
5113 level, outside of any function scope. */
5115 ix86_set_current_function (tree fndecl)
5117 /* Only change the context if the function changes. This hook is called
5118 several times in the course of compiling a function, and we don't want to
5119 slow things down too much or call target_reinit when it isn't safe. */
5120 if (fndecl && fndecl != ix86_previous_fndecl)
5122 tree old_tree = (ix86_previous_fndecl
5123 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
5126 tree new_tree = (fndecl
5127 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
5130 if (old_tree == new_tree)
5133 else if (new_tree && new_tree != target_option_default_node)
5135 cl_target_option_restore (&global_options,
5136 TREE_TARGET_OPTION (new_tree));
5137 if (TREE_TARGET_GLOBALS (new_tree))
5138 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5140 TREE_TARGET_GLOBALS (new_tree)
5141 = save_target_globals_default_opts ();
5144 else if (old_tree && old_tree != target_option_default_node)
5145 ix86_reset_to_default_globals ();
5146 ix86_previous_fndecl = fndecl;
5151 /* Return true if this goes in large data/bss. */
5154 ix86_in_large_data_p (tree exp)
5156 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5159 /* Functions are never large data. */
5160 if (TREE_CODE (exp) == FUNCTION_DECL)
5163 /* Automatic variables are never large data. */
5164 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5167 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5169 const char *section = DECL_SECTION_NAME (exp);
5170 if (strcmp (section, ".ldata") == 0
5171 || strcmp (section, ".lbss") == 0)
5177 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5179 /* If this is an incomplete type with size 0, then we can't put it
5180 in data because it might be too big when completed. Also,
5181 int_size_in_bytes returns -1 if size can vary or is larger than
5182 an integer in which case also it is safer to assume that it goes in
5184 if (size <= 0 || size > ix86_section_threshold)
5191 /* Switch to the appropriate section for output of DECL.
5192 DECL is either a `VAR_DECL' node or a constant of some sort.
5193 RELOC indicates whether forming the initial value of DECL requires
5194 link-time relocations. */
5196 ATTRIBUTE_UNUSED static section *
5197 x86_64_elf_select_section (tree decl, int reloc,
5198 unsigned HOST_WIDE_INT align)
5200 if (ix86_in_large_data_p (decl))
5202 const char *sname = NULL;
5203 unsigned int flags = SECTION_WRITE;
5204 switch (categorize_decl_for_section (decl, reloc))
5209 case SECCAT_DATA_REL:
5210 sname = ".ldata.rel";
5212 case SECCAT_DATA_REL_LOCAL:
5213 sname = ".ldata.rel.local";
5215 case SECCAT_DATA_REL_RO:
5216 sname = ".ldata.rel.ro";
5218 case SECCAT_DATA_REL_RO_LOCAL:
5219 sname = ".ldata.rel.ro.local";
5223 flags |= SECTION_BSS;
5226 case SECCAT_RODATA_MERGE_STR:
5227 case SECCAT_RODATA_MERGE_STR_INIT:
5228 case SECCAT_RODATA_MERGE_CONST:
5232 case SECCAT_SRODATA:
5239 /* We don't split these for medium model. Place them into
5240 default sections and hope for best. */
5245 /* We might get called with string constants, but get_named_section
5246 doesn't like them as they are not DECLs. Also, we need to set
5247 flags in that case. */
5249 return get_section (sname, flags, NULL);
5250 return get_named_section (decl, sname, reloc);
5253 return default_elf_select_section (decl, reloc, align);
5256 /* Select a set of attributes for section NAME based on the properties
5257 of DECL and whether or not RELOC indicates that DECL's initializer
5258 might contain runtime relocations. */
5260 static unsigned int ATTRIBUTE_UNUSED
5261 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5263 unsigned int flags = default_section_type_flags (decl, name, reloc);
5265 if (decl == NULL_TREE
5266 && (strcmp (name, ".ldata.rel.ro") == 0
5267 || strcmp (name, ".ldata.rel.ro.local") == 0))
5268 flags |= SECTION_RELRO;
5270 if (strcmp (name, ".lbss") == 0
5271 || strncmp (name, ".lbss.", 5) == 0
5272 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5273 flags |= SECTION_BSS;
5278 /* Build up a unique section name, expressed as a
5279 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5280 RELOC indicates whether the initial value of EXP requires
5281 link-time relocations. */
5283 static void ATTRIBUTE_UNUSED
5284 x86_64_elf_unique_section (tree decl, int reloc)
5286 if (ix86_in_large_data_p (decl))
5288 const char *prefix = NULL;
5289 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5290 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5292 switch (categorize_decl_for_section (decl, reloc))
5295 case SECCAT_DATA_REL:
5296 case SECCAT_DATA_REL_LOCAL:
5297 case SECCAT_DATA_REL_RO:
5298 case SECCAT_DATA_REL_RO_LOCAL:
5299 prefix = one_only ? ".ld" : ".ldata";
5302 prefix = one_only ? ".lb" : ".lbss";
5305 case SECCAT_RODATA_MERGE_STR:
5306 case SECCAT_RODATA_MERGE_STR_INIT:
5307 case SECCAT_RODATA_MERGE_CONST:
5308 prefix = one_only ? ".lr" : ".lrodata";
5310 case SECCAT_SRODATA:
5317 /* We don't split these for medium model. Place them into
5318 default sections and hope for best. */
5323 const char *name, *linkonce;
5326 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5327 name = targetm.strip_name_encoding (name);
5329 /* If we're using one_only, then there needs to be a .gnu.linkonce
5330 prefix to the section name. */
5331 linkonce = one_only ? ".gnu.linkonce" : "";
5333 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5335 set_decl_section_name (decl, string);
5339 default_unique_section (decl, reloc);
5342 #ifdef COMMON_ASM_OP
5343 /* This says how to output assembler code to declare an
5344 uninitialized external linkage data object.
5346 For medium model x86-64 we need to use .largecomm opcode for
5349 x86_elf_aligned_common (FILE *file,
5350 const char *name, unsigned HOST_WIDE_INT size,
5353 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5354 && size > (unsigned int)ix86_section_threshold)
5355 fputs ("\t.largecomm\t", file);
5357 fputs (COMMON_ASM_OP, file);
5358 assemble_name (file, name);
5359 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5360 size, align / BITS_PER_UNIT);
5364 /* Utility function for targets to use in implementing
5365 ASM_OUTPUT_ALIGNED_BSS. */
5368 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5369 unsigned HOST_WIDE_INT size, int align)
5371 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5372 && size > (unsigned int)ix86_section_threshold)
5373 switch_to_section (get_named_section (decl, ".lbss", 0));
5375 switch_to_section (bss_section);
5376 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5377 #ifdef ASM_DECLARE_OBJECT_NAME
5378 last_assemble_variable_decl = decl;
5379 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5381 /* Standard thing is just output label for the object. */
5382 ASM_OUTPUT_LABEL (file, name);
5383 #endif /* ASM_DECLARE_OBJECT_NAME */
5384 ASM_OUTPUT_SKIP (file, size ? size : 1);
5387 /* Decide whether we must probe the stack before any space allocation
5388 on this target. It's essentially TARGET_STACK_PROBE except when
5389 -fstack-check causes the stack to be already probed differently. */
5392 ix86_target_stack_probe (void)
5394 /* Do not probe the stack twice if static stack checking is enabled. */
5395 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5398 return TARGET_STACK_PROBE;
5401 /* Decide whether we can make a sibling call to a function. DECL is the
5402 declaration of the function being targeted by the call and EXP is the
5403 CALL_EXPR representing the call. */
5406 ix86_function_ok_for_sibcall (tree decl, tree exp)
5408 tree type, decl_or_type;
5411 /* If we are generating position-independent code, we cannot sibcall
5412 optimize any indirect call, or a direct call to a global function,
5413 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5417 && (!decl || !targetm.binds_local_p (decl)))
5420 /* If we need to align the outgoing stack, then sibcalling would
5421 unalign the stack, which may break the called function. */
5422 if (ix86_minimum_incoming_stack_boundary (true)
5423 < PREFERRED_STACK_BOUNDARY)
5428 decl_or_type = decl;
5429 type = TREE_TYPE (decl);
5433 /* We're looking at the CALL_EXPR, we need the type of the function. */
5434 type = CALL_EXPR_FN (exp); /* pointer expression */
5435 type = TREE_TYPE (type); /* pointer type */
5436 type = TREE_TYPE (type); /* function type */
5437 decl_or_type = type;
5440 /* Check that the return value locations are the same. Like
5441 if we are returning floats on the 80387 register stack, we cannot
5442 make a sibcall from a function that doesn't return a float to a
5443 function that does or, conversely, from a function that does return
5444 a float to a function that doesn't; the necessary stack adjustment
5445 would not be executed. This is also the place we notice
5446 differences in the return value ABI. Note that it is ok for one
5447 of the functions to have void return type as long as the return
5448 value of the other is passed in a register. */
5449 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5450 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5452 if (STACK_REG_P (a) || STACK_REG_P (b))
5454 if (!rtx_equal_p (a, b))
5457 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5459 else if (!rtx_equal_p (a, b))
5464 /* The SYSV ABI has more call-clobbered registers;
5465 disallow sibcalls from MS to SYSV. */
5466 if (cfun->machine->call_abi == MS_ABI
5467 && ix86_function_type_abi (type) == SYSV_ABI)
5472 /* If this call is indirect, we'll need to be able to use a
5473 call-clobbered register for the address of the target function.
5474 Make sure that all such registers are not used for passing
5475 parameters. Note that DLLIMPORT functions are indirect. */
5477 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5479 if (ix86_function_regparm (type, NULL) >= 3)
5481 /* ??? Need to count the actual number of registers to be used,
5482 not the possible number of registers. Fix later. */
5488 /* Otherwise okay. That also includes certain types of indirect calls. */
5492 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5493 and "sseregparm" calling convention attributes;
5494 arguments as in struct attribute_spec.handler. */
5497 ix86_handle_cconv_attribute (tree *node, tree name,
5502 if (TREE_CODE (*node) != FUNCTION_TYPE
5503 && TREE_CODE (*node) != METHOD_TYPE
5504 && TREE_CODE (*node) != FIELD_DECL
5505 && TREE_CODE (*node) != TYPE_DECL)
5507 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5509 *no_add_attrs = true;
5513 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5514 if (is_attribute_p ("regparm", name))
5518 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5520 error ("fastcall and regparm attributes are not compatible");
5523 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5525 error ("regparam and thiscall attributes are not compatible");
5528 cst = TREE_VALUE (args);
5529 if (TREE_CODE (cst) != INTEGER_CST)
5531 warning (OPT_Wattributes,
5532 "%qE attribute requires an integer constant argument",
5534 *no_add_attrs = true;
5536 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5538 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5540 *no_add_attrs = true;
5548 /* Do not warn when emulating the MS ABI. */
5549 if ((TREE_CODE (*node) != FUNCTION_TYPE
5550 && TREE_CODE (*node) != METHOD_TYPE)
5551 || ix86_function_type_abi (*node) != MS_ABI)
5552 warning (OPT_Wattributes, "%qE attribute ignored",
5554 *no_add_attrs = true;
5558 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5559 if (is_attribute_p ("fastcall", name))
5561 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5563 error ("fastcall and cdecl attributes are not compatible");
5565 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5567 error ("fastcall and stdcall attributes are not compatible");
5569 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5571 error ("fastcall and regparm attributes are not compatible");
5573 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5575 error ("fastcall and thiscall attributes are not compatible");
5579 /* Can combine stdcall with fastcall (redundant), regparm and
5581 else if (is_attribute_p ("stdcall", name))
5583 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5585 error ("stdcall and cdecl attributes are not compatible");
5587 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5589 error ("stdcall and fastcall attributes are not compatible");
5591 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5593 error ("stdcall and thiscall attributes are not compatible");
5597 /* Can combine cdecl with regparm and sseregparm. */
5598 else if (is_attribute_p ("cdecl", name))
5600 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5602 error ("stdcall and cdecl attributes are not compatible");
5604 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5606 error ("fastcall and cdecl attributes are not compatible");
5608 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5610 error ("cdecl and thiscall attributes are not compatible");
5613 else if (is_attribute_p ("thiscall", name))
5615 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5616 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5618 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5620 error ("stdcall and thiscall attributes are not compatible");
5622 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5624 error ("fastcall and thiscall attributes are not compatible");
5626 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5628 error ("cdecl and thiscall attributes are not compatible");
5632 /* Can combine sseregparm with all attributes. */
5637 /* The transactional memory builtins are implicitly regparm or fastcall
5638 depending on the ABI. Override the generic do-nothing attribute that
5639 these builtins were declared with, and replace it with one of the two
5640 attributes that we expect elsewhere. */
5643 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5644 int flags, bool *no_add_attrs)
5648 /* In no case do we want to add the placeholder attribute. */
5649 *no_add_attrs = true;
5651 /* The 64-bit ABI is unchanged for transactional memory. */
5655 /* ??? Is there a better way to validate 32-bit windows? We have
5656 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5657 if (CHECK_STACK_LIMIT > 0)
5658 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5661 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5662 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5664 decl_attributes (node, alt, flags);
5669 /* This function determines from TYPE the calling-convention. */
5672 ix86_get_callcvt (const_tree type)
5674 unsigned int ret = 0;
5679 return IX86_CALLCVT_CDECL;
5681 attrs = TYPE_ATTRIBUTES (type);
5682 if (attrs != NULL_TREE)
5684 if (lookup_attribute ("cdecl", attrs))
5685 ret |= IX86_CALLCVT_CDECL;
5686 else if (lookup_attribute ("stdcall", attrs))
5687 ret |= IX86_CALLCVT_STDCALL;
5688 else if (lookup_attribute ("fastcall", attrs))
5689 ret |= IX86_CALLCVT_FASTCALL;
5690 else if (lookup_attribute ("thiscall", attrs))
5691 ret |= IX86_CALLCVT_THISCALL;
5693 /* Regparam isn't allowed for thiscall and fastcall. */
5694 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5696 if (lookup_attribute ("regparm", attrs))
5697 ret |= IX86_CALLCVT_REGPARM;
5698 if (lookup_attribute ("sseregparm", attrs))
5699 ret |= IX86_CALLCVT_SSEREGPARM;
5702 if (IX86_BASE_CALLCVT(ret) != 0)
5706 is_stdarg = stdarg_p (type);
5707 if (TARGET_RTD && !is_stdarg)
5708 return IX86_CALLCVT_STDCALL | ret;
5712 || TREE_CODE (type) != METHOD_TYPE
5713 || ix86_function_type_abi (type) != MS_ABI)
5714 return IX86_CALLCVT_CDECL | ret;
5716 return IX86_CALLCVT_THISCALL;
5719 /* Return 0 if the attributes for two types are incompatible, 1 if they
5720 are compatible, and 2 if they are nearly compatible (which causes a
5721 warning to be generated). */
5724 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5726 unsigned int ccvt1, ccvt2;
5728 if (TREE_CODE (type1) != FUNCTION_TYPE
5729 && TREE_CODE (type1) != METHOD_TYPE)
5732 ccvt1 = ix86_get_callcvt (type1);
5733 ccvt2 = ix86_get_callcvt (type2);
5736 if (ix86_function_regparm (type1, NULL)
5737 != ix86_function_regparm (type2, NULL))
5743 /* Return the regparm value for a function with the indicated TYPE and DECL.
5744 DECL may be NULL when calling function indirectly
5745 or considering a libcall. */
5748 ix86_function_regparm (const_tree type, const_tree decl)
5755 return (ix86_function_type_abi (type) == SYSV_ABI
5756 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5757 ccvt = ix86_get_callcvt (type);
5758 regparm = ix86_regparm;
5760 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5762 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5765 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5769 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5771 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5774 /* Use register calling convention for local functions when possible. */
5776 && TREE_CODE (decl) == FUNCTION_DECL
5777 /* Caller and callee must agree on the calling convention, so
5778 checking here just optimize means that with
5779 __attribute__((optimize (...))) caller could use regparm convention
5780 and callee not, or vice versa. Instead look at whether the callee
5781 is optimized or not. */
5782 && opt_for_fn (decl, optimize)
5783 && !(profile_flag && !flag_fentry))
5785 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5786 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE (decl));
5787 if (i && i->local && i->can_change_signature)
5789 int local_regparm, globals = 0, regno;
5791 /* Make sure no regparm register is taken by a
5792 fixed register variable. */
5793 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5794 if (fixed_regs[local_regparm])
5797 /* We don't want to use regparm(3) for nested functions as
5798 these use a static chain pointer in the third argument. */
5799 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5802 /* In 32-bit mode save a register for the split stack. */
5803 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5806 /* Each fixed register usage increases register pressure,
5807 so less registers should be used for argument passing.
5808 This functionality can be overriden by an explicit
5810 for (regno = AX_REG; regno <= DI_REG; regno++)
5811 if (fixed_regs[regno])
5815 = globals < local_regparm ? local_regparm - globals : 0;
5817 if (local_regparm > regparm)
5818 regparm = local_regparm;
5825 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5826 DFmode (2) arguments in SSE registers for a function with the
5827 indicated TYPE and DECL. DECL may be NULL when calling function
5828 indirectly or considering a libcall. Otherwise return 0. */
5831 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5833 gcc_assert (!TARGET_64BIT);
5835 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5836 by the sseregparm attribute. */
5837 if (TARGET_SSEREGPARM
5838 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5845 error ("calling %qD with attribute sseregparm without "
5846 "SSE/SSE2 enabled", decl);
5848 error ("calling %qT with attribute sseregparm without "
5849 "SSE/SSE2 enabled", type);
5857 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5858 (and DFmode for SSE2) arguments in SSE registers. */
5859 if (decl && TARGET_SSE_MATH && optimize
5860 && !(profile_flag && !flag_fentry))
5862 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5863 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5864 if (i && i->local && i->can_change_signature)
5865 return TARGET_SSE2 ? 2 : 1;
5871 /* Return true if EAX is live at the start of the function. Used by
5872 ix86_expand_prologue to determine if we need special help before
5873 calling allocate_stack_worker. */
5876 ix86_eax_live_at_start_p (void)
5878 /* Cheat. Don't bother working forward from ix86_function_regparm
5879 to the function type to whether an actual argument is located in
5880 eax. Instead just look at cfg info, which is still close enough
5881 to correct at this point. This gives false positives for broken
5882 functions that might use uninitialized data that happens to be
5883 allocated in eax, but who cares? */
5884 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5888 ix86_keep_aggregate_return_pointer (tree fntype)
5894 attr = lookup_attribute ("callee_pop_aggregate_return",
5895 TYPE_ATTRIBUTES (fntype));
5897 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5899 /* For 32-bit MS-ABI the default is to keep aggregate
5901 if (ix86_function_type_abi (fntype) == MS_ABI)
5904 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5907 /* Value is the number of bytes of arguments automatically
5908 popped when returning from a subroutine call.
5909 FUNDECL is the declaration node of the function (as a tree),
5910 FUNTYPE is the data type of the function (as a tree),
5911 or for a library call it is an identifier node for the subroutine name.
5912 SIZE is the number of bytes of arguments passed on the stack.
5914 On the 80386, the RTD insn may be used to pop them if the number
5915 of args is fixed, but if the number is variable then the caller
5916 must pop them all. RTD can't be used for library calls now
5917 because the library is compiled with the Unix compiler.
5918 Use of RTD is a selectable option, since it is incompatible with
5919 standard Unix calling sequences. If the option is not selected,
5920 the caller must always pop the args.
5922 The attribute stdcall is equivalent to RTD on a per module basis. */
5925 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5929 /* None of the 64-bit ABIs pop arguments. */
5933 ccvt = ix86_get_callcvt (funtype);
5935 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5936 | IX86_CALLCVT_THISCALL)) != 0
5937 && ! stdarg_p (funtype))
5940 /* Lose any fake structure return argument if it is passed on the stack. */
5941 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5942 && !ix86_keep_aggregate_return_pointer (funtype))
5944 int nregs = ix86_function_regparm (funtype, fundecl);
5946 return GET_MODE_SIZE (Pmode);
5952 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5955 ix86_legitimate_combined_insn (rtx_insn *insn)
5957 /* Check operand constraints in case hard registers were propagated
5958 into insn pattern. This check prevents combine pass from
5959 generating insn patterns with invalid hard register operands.
5960 These invalid insns can eventually confuse reload to error out
5961 with a spill failure. See also PRs 46829 and 46843. */
5962 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5966 extract_insn (insn);
5967 preprocess_constraints (insn);
5969 int n_operands = recog_data.n_operands;
5970 int n_alternatives = recog_data.n_alternatives;
5971 for (i = 0; i < n_operands; i++)
5973 rtx op = recog_data.operand[i];
5974 machine_mode mode = GET_MODE (op);
5975 const operand_alternative *op_alt;
5980 /* For pre-AVX disallow unaligned loads/stores where the
5981 instructions don't support it. */
5983 && VECTOR_MODE_P (GET_MODE (op))
5984 && misaligned_operand (op, GET_MODE (op)))
5986 int min_align = get_attr_ssememalign (insn);
5991 /* A unary operator may be accepted by the predicate, but it
5992 is irrelevant for matching constraints. */
5996 if (GET_CODE (op) == SUBREG)
5998 if (REG_P (SUBREG_REG (op))
5999 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6000 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6001 GET_MODE (SUBREG_REG (op)),
6004 op = SUBREG_REG (op);
6007 if (!(REG_P (op) && HARD_REGISTER_P (op)))
6010 op_alt = recog_op_alt;
6012 /* Operand has no constraints, anything is OK. */
6013 win = !n_alternatives;
6015 alternative_mask preferred = get_preferred_alternatives (insn);
6016 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6018 if (!TEST_BIT (preferred, j))
6020 if (op_alt[i].anything_ok
6021 || (op_alt[i].matches != -1
6023 (recog_data.operand[i],
6024 recog_data.operand[op_alt[i].matches]))
6025 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6040 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6042 static unsigned HOST_WIDE_INT
6043 ix86_asan_shadow_offset (void)
6045 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6046 : HOST_WIDE_INT_C (0x7fff8000))
6047 : (HOST_WIDE_INT_1 << 29);
6050 /* Argument support functions. */
6052 /* Return true when register may be used to pass function parameters. */
6054 ix86_function_arg_regno_p (int regno)
6057 const int *parm_regs;
6062 return (regno < REGPARM_MAX
6063 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6065 return (regno < REGPARM_MAX
6066 || (TARGET_MMX && MMX_REGNO_P (regno)
6067 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6068 || (TARGET_SSE && SSE_REGNO_P (regno)
6069 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6072 if (TARGET_SSE && SSE_REGNO_P (regno)
6073 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6076 /* TODO: The function should depend on current function ABI but
6077 builtins.c would need updating then. Therefore we use the
6080 /* RAX is used as hidden argument to va_arg functions. */
6081 if (ix86_abi == SYSV_ABI && regno == AX_REG)
6084 if (ix86_abi == MS_ABI)
6085 parm_regs = x86_64_ms_abi_int_parameter_registers;
6087 parm_regs = x86_64_int_parameter_registers;
6088 for (i = 0; i < (ix86_abi == MS_ABI
6089 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6090 if (regno == parm_regs[i])
6095 /* Return if we do not know how to pass TYPE solely in registers. */
6098 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6100 if (must_pass_in_stack_var_size_or_pad (mode, type))
6103 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6104 The layout_type routine is crafty and tries to trick us into passing
6105 currently unsupported vector types on the stack by using TImode. */
6106 return (!TARGET_64BIT && mode == TImode
6107 && type && TREE_CODE (type) != VECTOR_TYPE);
6110 /* It returns the size, in bytes, of the area reserved for arguments passed
6111 in registers for the function represented by fndecl dependent to the used
6114 ix86_reg_parm_stack_space (const_tree fndecl)
6116 enum calling_abi call_abi = SYSV_ABI;
6117 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6118 call_abi = ix86_function_abi (fndecl);
6120 call_abi = ix86_function_type_abi (fndecl);
6121 if (TARGET_64BIT && call_abi == MS_ABI)
6126 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6129 ix86_function_type_abi (const_tree fntype)
6131 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6133 enum calling_abi abi = ix86_abi;
6134 if (abi == SYSV_ABI)
6136 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6140 static bool warned = false;
6143 error ("X32 does not support ms_abi attribute");
6150 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6157 /* We add this as a workaround in order to use libc_has_function
6160 ix86_libc_has_function (enum function_class fn_class)
6162 return targetm.libc_has_function (fn_class);
6166 ix86_function_ms_hook_prologue (const_tree fn)
6168 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6170 if (decl_function_context (fn) != NULL_TREE)
6171 error_at (DECL_SOURCE_LOCATION (fn),
6172 "ms_hook_prologue is not compatible with nested function");
6179 static enum calling_abi
6180 ix86_function_abi (const_tree fndecl)
6184 return ix86_function_type_abi (TREE_TYPE (fndecl));
6187 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6190 ix86_cfun_abi (void)
6194 return cfun->machine->call_abi;
6197 /* Write the extra assembler code needed to declare a function properly. */
6200 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6203 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6207 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6208 unsigned int filler_cc = 0xcccccccc;
6210 for (i = 0; i < filler_count; i += 4)
6211 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6214 #ifdef SUBTARGET_ASM_UNWIND_INIT
6215 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6218 ASM_OUTPUT_LABEL (asm_out_file, fname);
6220 /* Output magic byte marker, if hot-patch attribute is set. */
6225 /* leaq [%rsp + 0], %rsp */
6226 asm_fprintf (asm_out_file, ASM_BYTE
6227 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6231 /* movl.s %edi, %edi
6233 movl.s %esp, %ebp */
6234 asm_fprintf (asm_out_file, ASM_BYTE
6235 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6241 extern void init_regs (void);
6243 /* Implementation of call abi switching target hook. Specific to FNDECL
6244 the specific call register sets are set. See also
6245 ix86_conditional_register_usage for more details. */
6247 ix86_call_abi_override (const_tree fndecl)
6249 if (fndecl == NULL_TREE)
6250 cfun->machine->call_abi = ix86_abi;
6252 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6255 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6256 expensive re-initialization of init_regs each time we switch function context
6257 since this is needed only during RTL expansion. */
6259 ix86_maybe_switch_abi (void)
6262 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6266 /* Return 1 if pseudo register should be created and used to hold
6267 GOT address for PIC code. */
6269 ix86_use_pseudo_pic_reg (void)
6272 && (ix86_cmodel == CM_SMALL_PIC
6279 /* Initialize large model PIC register. */
6282 ix86_init_large_pic_reg (unsigned int tmp_regno)
6284 rtx_code_label *label;
6287 gcc_assert (Pmode == DImode);
6288 label = gen_label_rtx ();
6290 LABEL_PRESERVE_P (label) = 1;
6291 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6292 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6293 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6295 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6296 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6297 pic_offset_table_rtx, tmp_reg));
6300 /* Create and initialize PIC register if required. */
6302 ix86_init_pic_reg (void)
6307 if (!ix86_use_pseudo_pic_reg ())
6314 if (ix86_cmodel == CM_LARGE_PIC)
6315 ix86_init_large_pic_reg (R11_REG);
6317 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6321 /* If there is future mcount call in the function it is more profitable
6322 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6323 rtx reg = crtl->profile
6324 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6325 : pic_offset_table_rtx;
6326 rtx insn = emit_insn (gen_set_got (reg));
6327 RTX_FRAME_RELATED_P (insn) = 1;
6329 emit_move_insn (pic_offset_table_rtx, reg);
6330 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6336 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6337 insert_insn_on_edge (seq, entry_edge);
6338 commit_one_edge_insertion (entry_edge);
6341 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6342 for a call to a function whose data type is FNTYPE.
6343 For a library call, FNTYPE is 0. */
6346 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6347 tree fntype, /* tree ptr for function decl */
6348 rtx libname, /* SYMBOL_REF of library name or 0 */
6352 struct cgraph_local_info *i;
6354 memset (cum, 0, sizeof (*cum));
6358 i = cgraph_node::local_info (fndecl);
6359 cum->call_abi = ix86_function_abi (fndecl);
6364 cum->call_abi = ix86_function_type_abi (fntype);
6367 cum->caller = caller;
6369 /* Set up the number of registers to use for passing arguments. */
6370 cum->nregs = ix86_regparm;
6373 cum->nregs = (cum->call_abi == SYSV_ABI
6374 ? X86_64_REGPARM_MAX
6375 : X86_64_MS_REGPARM_MAX);
6379 cum->sse_nregs = SSE_REGPARM_MAX;
6382 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6383 ? X86_64_SSE_REGPARM_MAX
6384 : X86_64_MS_SSE_REGPARM_MAX);
6388 cum->mmx_nregs = MMX_REGPARM_MAX;
6389 cum->warn_avx512f = true;
6390 cum->warn_avx = true;
6391 cum->warn_sse = true;
6392 cum->warn_mmx = true;
6394 /* Because type might mismatch in between caller and callee, we need to
6395 use actual type of function for local calls.
6396 FIXME: cgraph_analyze can be told to actually record if function uses
6397 va_start so for local functions maybe_vaarg can be made aggressive
6399 FIXME: once typesytem is fixed, we won't need this code anymore. */
6400 if (i && i->local && i->can_change_signature)
6401 fntype = TREE_TYPE (fndecl);
6402 cum->stdarg = stdarg_p (fntype);
6403 cum->maybe_vaarg = (fntype
6404 ? (!prototype_p (fntype) || stdarg_p (fntype))
6407 cum->bnd_regno = FIRST_BND_REG;
6408 cum->bnds_in_bt = 0;
6409 cum->force_bnd_pass = 0;
6413 /* If there are variable arguments, then we won't pass anything
6414 in registers in 32-bit mode. */
6415 if (stdarg_p (fntype))
6420 cum->warn_avx512f = false;
6421 cum->warn_avx = false;
6422 cum->warn_sse = false;
6423 cum->warn_mmx = false;
6427 /* Use ecx and edx registers if function has fastcall attribute,
6428 else look for regparm information. */
6431 unsigned int ccvt = ix86_get_callcvt (fntype);
6432 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6435 cum->fastcall = 1; /* Same first register as in fastcall. */
6437 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6443 cum->nregs = ix86_function_regparm (fntype, fndecl);
6446 /* Set up the number of SSE registers used for passing SFmode
6447 and DFmode arguments. Warn for mismatching ABI. */
6448 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6452 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6453 But in the case of vector types, it is some vector mode.
6455 When we have only some of our vector isa extensions enabled, then there
6456 are some modes for which vector_mode_supported_p is false. For these
6457 modes, the generic vector support in gcc will choose some non-vector mode
6458 in order to implement the type. By computing the natural mode, we'll
6459 select the proper ABI location for the operand and not depend on whatever
6460 the middle-end decides to do with these vector types.
6462 The midde-end can't deal with the vector types > 16 bytes. In this
6463 case, we return the original mode and warn ABI change if CUM isn't
6466 If INT_RETURN is true, warn ABI change if the vector mode isn't
6467 available for function return value. */
6470 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6473 machine_mode mode = TYPE_MODE (type);
6475 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6477 HOST_WIDE_INT size = int_size_in_bytes (type);
6478 if ((size == 8 || size == 16 || size == 32 || size == 64)
6479 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6480 && TYPE_VECTOR_SUBPARTS (type) > 1)
6482 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6484 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6485 mode = MIN_MODE_VECTOR_FLOAT;
6487 mode = MIN_MODE_VECTOR_INT;
6489 /* Get the mode which has this inner mode and number of units. */
6490 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6491 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6492 && GET_MODE_INNER (mode) == innermode)
6494 if (size == 64 && !TARGET_AVX512F)
6496 static bool warnedavx512f;
6497 static bool warnedavx512f_ret;
6499 if (cum && cum->warn_avx512f && !warnedavx512f)
6501 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6502 "without AVX512F enabled changes the ABI"))
6503 warnedavx512f = true;
6505 else if (in_return && !warnedavx512f_ret)
6507 if (warning (OPT_Wpsabi, "AVX512F vector return "
6508 "without AVX512F enabled changes the ABI"))
6509 warnedavx512f_ret = true;
6512 return TYPE_MODE (type);
6514 else if (size == 32 && !TARGET_AVX)
6516 static bool warnedavx;
6517 static bool warnedavx_ret;
6519 if (cum && cum->warn_avx && !warnedavx)
6521 if (warning (OPT_Wpsabi, "AVX vector argument "
6522 "without AVX enabled changes the ABI"))
6525 else if (in_return && !warnedavx_ret)
6527 if (warning (OPT_Wpsabi, "AVX vector return "
6528 "without AVX enabled changes the ABI"))
6529 warnedavx_ret = true;
6532 return TYPE_MODE (type);
6534 else if (((size == 8 && TARGET_64BIT) || size == 16)
6537 static bool warnedsse;
6538 static bool warnedsse_ret;
6540 if (cum && cum->warn_sse && !warnedsse)
6542 if (warning (OPT_Wpsabi, "SSE vector argument "
6543 "without SSE enabled changes the ABI"))
6546 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6548 if (warning (OPT_Wpsabi, "SSE vector return "
6549 "without SSE enabled changes the ABI"))
6550 warnedsse_ret = true;
6553 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6555 static bool warnedmmx;
6556 static bool warnedmmx_ret;
6558 if (cum && cum->warn_mmx && !warnedmmx)
6560 if (warning (OPT_Wpsabi, "MMX vector argument "
6561 "without MMX enabled changes the ABI"))
6564 else if (in_return && !warnedmmx_ret)
6566 if (warning (OPT_Wpsabi, "MMX vector return "
6567 "without MMX enabled changes the ABI"))
6568 warnedmmx_ret = true;
6581 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6582 this may not agree with the mode that the type system has chosen for the
6583 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6584 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6587 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6592 if (orig_mode != BLKmode)
6593 tmp = gen_rtx_REG (orig_mode, regno);
6596 tmp = gen_rtx_REG (mode, regno);
6597 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6598 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6604 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6605 of this code is to classify each 8bytes of incoming argument by the register
6606 class and assign registers accordingly. */
6608 /* Return the union class of CLASS1 and CLASS2.
6609 See the x86-64 PS ABI for details. */
6611 static enum x86_64_reg_class
6612 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6614 /* Rule #1: If both classes are equal, this is the resulting class. */
6615 if (class1 == class2)
6618 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6620 if (class1 == X86_64_NO_CLASS)
6622 if (class2 == X86_64_NO_CLASS)
6625 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6626 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6627 return X86_64_MEMORY_CLASS;
6629 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6630 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6631 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6632 return X86_64_INTEGERSI_CLASS;
6633 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6634 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6635 return X86_64_INTEGER_CLASS;
6637 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6639 if (class1 == X86_64_X87_CLASS
6640 || class1 == X86_64_X87UP_CLASS
6641 || class1 == X86_64_COMPLEX_X87_CLASS
6642 || class2 == X86_64_X87_CLASS
6643 || class2 == X86_64_X87UP_CLASS
6644 || class2 == X86_64_COMPLEX_X87_CLASS)
6645 return X86_64_MEMORY_CLASS;
6647 /* Rule #6: Otherwise class SSE is used. */
6648 return X86_64_SSE_CLASS;
6651 /* Classify the argument of type TYPE and mode MODE.
6652 CLASSES will be filled by the register class used to pass each word
6653 of the operand. The number of words is returned. In case the parameter
6654 should be passed in memory, 0 is returned. As a special case for zero
6655 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6657 BIT_OFFSET is used internally for handling records and specifies offset
6658 of the offset in bits modulo 512 to avoid overflow cases.
6660 See the x86-64 PS ABI for details.
6664 classify_argument (machine_mode mode, const_tree type,
6665 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6667 HOST_WIDE_INT bytes =
6668 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6670 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6672 /* Variable sized entities are always passed/returned in memory. */
6676 if (mode != VOIDmode
6677 && targetm.calls.must_pass_in_stack (mode, type))
6680 if (type && AGGREGATE_TYPE_P (type))
6684 enum x86_64_reg_class subclasses[MAX_CLASSES];
6686 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6690 for (i = 0; i < words; i++)
6691 classes[i] = X86_64_NO_CLASS;
6693 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6694 signalize memory class, so handle it as special case. */
6697 classes[0] = X86_64_NO_CLASS;
6701 /* Classify each field of record and merge classes. */
6702 switch (TREE_CODE (type))
6705 /* And now merge the fields of structure. */
6706 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6708 if (TREE_CODE (field) == FIELD_DECL)
6712 if (TREE_TYPE (field) == error_mark_node)
6715 /* Bitfields are always classified as integer. Handle them
6716 early, since later code would consider them to be
6717 misaligned integers. */
6718 if (DECL_BIT_FIELD (field))
6720 for (i = (int_bit_position (field)
6721 + (bit_offset % 64)) / 8 / 8;
6722 i < ((int_bit_position (field) + (bit_offset % 64))
6723 + tree_to_shwi (DECL_SIZE (field))
6726 merge_classes (X86_64_INTEGER_CLASS,
6733 type = TREE_TYPE (field);
6735 /* Flexible array member is ignored. */
6736 if (TYPE_MODE (type) == BLKmode
6737 && TREE_CODE (type) == ARRAY_TYPE
6738 && TYPE_SIZE (type) == NULL_TREE
6739 && TYPE_DOMAIN (type) != NULL_TREE
6740 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6745 if (!warned && warn_psabi)
6748 inform (input_location,
6749 "the ABI of passing struct with"
6750 " a flexible array member has"
6751 " changed in GCC 4.4");
6755 num = classify_argument (TYPE_MODE (type), type,
6757 (int_bit_position (field)
6758 + bit_offset) % 512);
6761 pos = (int_bit_position (field)
6762 + (bit_offset % 64)) / 8 / 8;
6763 for (i = 0; i < num && (i + pos) < words; i++)
6765 merge_classes (subclasses[i], classes[i + pos]);
6772 /* Arrays are handled as small records. */
6775 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6776 TREE_TYPE (type), subclasses, bit_offset);
6780 /* The partial classes are now full classes. */
6781 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6782 subclasses[0] = X86_64_SSE_CLASS;
6783 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6784 && !((bit_offset % 64) == 0 && bytes == 4))
6785 subclasses[0] = X86_64_INTEGER_CLASS;
6787 for (i = 0; i < words; i++)
6788 classes[i] = subclasses[i % num];
6793 case QUAL_UNION_TYPE:
6794 /* Unions are similar to RECORD_TYPE but offset is always 0.
6796 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6798 if (TREE_CODE (field) == FIELD_DECL)
6802 if (TREE_TYPE (field) == error_mark_node)
6805 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6806 TREE_TYPE (field), subclasses,
6810 for (i = 0; i < num && i < words; i++)
6811 classes[i] = merge_classes (subclasses[i], classes[i]);
6822 /* When size > 16 bytes, if the first one isn't
6823 X86_64_SSE_CLASS or any other ones aren't
6824 X86_64_SSEUP_CLASS, everything should be passed in
6826 if (classes[0] != X86_64_SSE_CLASS)
6829 for (i = 1; i < words; i++)
6830 if (classes[i] != X86_64_SSEUP_CLASS)
6834 /* Final merger cleanup. */
6835 for (i = 0; i < words; i++)
6837 /* If one class is MEMORY, everything should be passed in
6839 if (classes[i] == X86_64_MEMORY_CLASS)
6842 /* The X86_64_SSEUP_CLASS should be always preceded by
6843 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6844 if (classes[i] == X86_64_SSEUP_CLASS
6845 && classes[i - 1] != X86_64_SSE_CLASS
6846 && classes[i - 1] != X86_64_SSEUP_CLASS)
6848 /* The first one should never be X86_64_SSEUP_CLASS. */
6849 gcc_assert (i != 0);
6850 classes[i] = X86_64_SSE_CLASS;
6853 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6854 everything should be passed in memory. */
6855 if (classes[i] == X86_64_X87UP_CLASS
6856 && (classes[i - 1] != X86_64_X87_CLASS))
6860 /* The first one should never be X86_64_X87UP_CLASS. */
6861 gcc_assert (i != 0);
6862 if (!warned && warn_psabi)
6865 inform (input_location,
6866 "the ABI of passing union with long double"
6867 " has changed in GCC 4.4");
6875 /* Compute alignment needed. We align all types to natural boundaries with
6876 exception of XFmode that is aligned to 64bits. */
6877 if (mode != VOIDmode && mode != BLKmode)
6879 int mode_alignment = GET_MODE_BITSIZE (mode);
6882 mode_alignment = 128;
6883 else if (mode == XCmode)
6884 mode_alignment = 256;
6885 if (COMPLEX_MODE_P (mode))
6886 mode_alignment /= 2;
6887 /* Misaligned fields are always returned in memory. */
6888 if (bit_offset % mode_alignment)
6892 /* for V1xx modes, just use the base mode */
6893 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6894 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6895 mode = GET_MODE_INNER (mode);
6897 /* Classification of atomic types. */
6902 classes[0] = X86_64_SSE_CLASS;
6905 classes[0] = X86_64_SSE_CLASS;
6906 classes[1] = X86_64_SSEUP_CLASS;
6916 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6918 /* Analyze last 128 bits only. */
6919 size = (size - 1) & 0x7f;
6923 classes[0] = X86_64_INTEGERSI_CLASS;
6928 classes[0] = X86_64_INTEGER_CLASS;
6931 else if (size < 64+32)
6933 classes[0] = X86_64_INTEGER_CLASS;
6934 classes[1] = X86_64_INTEGERSI_CLASS;
6937 else if (size < 64+64)
6939 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6947 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6951 /* OImode shouldn't be used directly. */
6956 if (!(bit_offset % 64))
6957 classes[0] = X86_64_SSESF_CLASS;
6959 classes[0] = X86_64_SSE_CLASS;
6962 classes[0] = X86_64_SSEDF_CLASS;
6965 classes[0] = X86_64_X87_CLASS;
6966 classes[1] = X86_64_X87UP_CLASS;
6969 classes[0] = X86_64_SSE_CLASS;
6970 classes[1] = X86_64_SSEUP_CLASS;
6973 classes[0] = X86_64_SSE_CLASS;
6974 if (!(bit_offset % 64))
6980 if (!warned && warn_psabi)
6983 inform (input_location,
6984 "the ABI of passing structure with complex float"
6985 " member has changed in GCC 4.4");
6987 classes[1] = X86_64_SSESF_CLASS;
6991 classes[0] = X86_64_SSEDF_CLASS;
6992 classes[1] = X86_64_SSEDF_CLASS;
6995 classes[0] = X86_64_COMPLEX_X87_CLASS;
6998 /* This modes is larger than 16 bytes. */
7006 classes[0] = X86_64_SSE_CLASS;
7007 classes[1] = X86_64_SSEUP_CLASS;
7008 classes[2] = X86_64_SSEUP_CLASS;
7009 classes[3] = X86_64_SSEUP_CLASS;
7017 classes[0] = X86_64_SSE_CLASS;
7018 classes[1] = X86_64_SSEUP_CLASS;
7019 classes[2] = X86_64_SSEUP_CLASS;
7020 classes[3] = X86_64_SSEUP_CLASS;
7021 classes[4] = X86_64_SSEUP_CLASS;
7022 classes[5] = X86_64_SSEUP_CLASS;
7023 classes[6] = X86_64_SSEUP_CLASS;
7024 classes[7] = X86_64_SSEUP_CLASS;
7032 classes[0] = X86_64_SSE_CLASS;
7033 classes[1] = X86_64_SSEUP_CLASS;
7041 classes[0] = X86_64_SSE_CLASS;
7047 gcc_assert (VECTOR_MODE_P (mode));
7052 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7054 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7055 classes[0] = X86_64_INTEGERSI_CLASS;
7057 classes[0] = X86_64_INTEGER_CLASS;
7058 classes[1] = X86_64_INTEGER_CLASS;
7059 return 1 + (bytes > 8);
7063 /* Examine the argument and return set number of register required in each
7064 class. Return true iff parameter should be passed in memory. */
7067 examine_argument (machine_mode mode, const_tree type, int in_return,
7068 int *int_nregs, int *sse_nregs)
7070 enum x86_64_reg_class regclass[MAX_CLASSES];
7071 int n = classify_argument (mode, type, regclass, 0);
7078 for (n--; n >= 0; n--)
7079 switch (regclass[n])
7081 case X86_64_INTEGER_CLASS:
7082 case X86_64_INTEGERSI_CLASS:
7085 case X86_64_SSE_CLASS:
7086 case X86_64_SSESF_CLASS:
7087 case X86_64_SSEDF_CLASS:
7090 case X86_64_NO_CLASS:
7091 case X86_64_SSEUP_CLASS:
7093 case X86_64_X87_CLASS:
7094 case X86_64_X87UP_CLASS:
7095 case X86_64_COMPLEX_X87_CLASS:
7099 case X86_64_MEMORY_CLASS:
7106 /* Construct container for the argument used by GCC interface. See
7107 FUNCTION_ARG for the detailed description. */
7110 construct_container (machine_mode mode, machine_mode orig_mode,
7111 const_tree type, int in_return, int nintregs, int nsseregs,
7112 const int *intreg, int sse_regno)
7114 /* The following variables hold the static issued_error state. */
7115 static bool issued_sse_arg_error;
7116 static bool issued_sse_ret_error;
7117 static bool issued_x87_ret_error;
7119 machine_mode tmpmode;
7121 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7122 enum x86_64_reg_class regclass[MAX_CLASSES];
7126 int needed_sseregs, needed_intregs;
7127 rtx exp[MAX_CLASSES];
7130 n = classify_argument (mode, type, regclass, 0);
7133 if (examine_argument (mode, type, in_return, &needed_intregs,
7136 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7139 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7140 some less clueful developer tries to use floating-point anyway. */
7141 if (needed_sseregs && !TARGET_SSE)
7145 if (!issued_sse_ret_error)
7147 error ("SSE register return with SSE disabled");
7148 issued_sse_ret_error = true;
7151 else if (!issued_sse_arg_error)
7153 error ("SSE register argument with SSE disabled");
7154 issued_sse_arg_error = true;
7159 /* Likewise, error if the ABI requires us to return values in the
7160 x87 registers and the user specified -mno-80387. */
7161 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7162 for (i = 0; i < n; i++)
7163 if (regclass[i] == X86_64_X87_CLASS
7164 || regclass[i] == X86_64_X87UP_CLASS
7165 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7167 if (!issued_x87_ret_error)
7169 error ("x87 register return with x87 disabled");
7170 issued_x87_ret_error = true;
7175 /* First construct simple cases. Avoid SCmode, since we want to use
7176 single register to pass this type. */
7177 if (n == 1 && mode != SCmode)
7178 switch (regclass[0])
7180 case X86_64_INTEGER_CLASS:
7181 case X86_64_INTEGERSI_CLASS:
7182 return gen_rtx_REG (mode, intreg[0]);
7183 case X86_64_SSE_CLASS:
7184 case X86_64_SSESF_CLASS:
7185 case X86_64_SSEDF_CLASS:
7186 if (mode != BLKmode)
7187 return gen_reg_or_parallel (mode, orig_mode,
7188 SSE_REGNO (sse_regno));
7190 case X86_64_X87_CLASS:
7191 case X86_64_COMPLEX_X87_CLASS:
7192 return gen_rtx_REG (mode, FIRST_STACK_REG);
7193 case X86_64_NO_CLASS:
7194 /* Zero sized array, struct or class. */
7200 && regclass[0] == X86_64_SSE_CLASS
7201 && regclass[1] == X86_64_SSEUP_CLASS
7203 return gen_reg_or_parallel (mode, orig_mode,
7204 SSE_REGNO (sse_regno));
7206 && regclass[0] == X86_64_SSE_CLASS
7207 && regclass[1] == X86_64_SSEUP_CLASS
7208 && regclass[2] == X86_64_SSEUP_CLASS
7209 && regclass[3] == X86_64_SSEUP_CLASS
7211 return gen_reg_or_parallel (mode, orig_mode,
7212 SSE_REGNO (sse_regno));
7214 && regclass[0] == X86_64_SSE_CLASS
7215 && regclass[1] == X86_64_SSEUP_CLASS
7216 && regclass[2] == X86_64_SSEUP_CLASS
7217 && regclass[3] == X86_64_SSEUP_CLASS
7218 && regclass[4] == X86_64_SSEUP_CLASS
7219 && regclass[5] == X86_64_SSEUP_CLASS
7220 && regclass[6] == X86_64_SSEUP_CLASS
7221 && regclass[7] == X86_64_SSEUP_CLASS
7223 return gen_reg_or_parallel (mode, orig_mode,
7224 SSE_REGNO (sse_regno));
7226 && regclass[0] == X86_64_X87_CLASS
7227 && regclass[1] == X86_64_X87UP_CLASS)
7228 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7231 && regclass[0] == X86_64_INTEGER_CLASS
7232 && regclass[1] == X86_64_INTEGER_CLASS
7233 && (mode == CDImode || mode == TImode)
7234 && intreg[0] + 1 == intreg[1])
7235 return gen_rtx_REG (mode, intreg[0]);
7237 /* Otherwise figure out the entries of the PARALLEL. */
7238 for (i = 0; i < n; i++)
7242 switch (regclass[i])
7244 case X86_64_NO_CLASS:
7246 case X86_64_INTEGER_CLASS:
7247 case X86_64_INTEGERSI_CLASS:
7248 /* Merge TImodes on aligned occasions here too. */
7249 if (i * 8 + 8 > bytes)
7251 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7252 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7256 /* We've requested 24 bytes we
7257 don't have mode for. Use DImode. */
7258 if (tmpmode == BLKmode)
7261 = gen_rtx_EXPR_LIST (VOIDmode,
7262 gen_rtx_REG (tmpmode, *intreg),
7266 case X86_64_SSESF_CLASS:
7268 = gen_rtx_EXPR_LIST (VOIDmode,
7269 gen_rtx_REG (SFmode,
7270 SSE_REGNO (sse_regno)),
7274 case X86_64_SSEDF_CLASS:
7276 = gen_rtx_EXPR_LIST (VOIDmode,
7277 gen_rtx_REG (DFmode,
7278 SSE_REGNO (sse_regno)),
7282 case X86_64_SSE_CLASS:
7290 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7300 && regclass[1] == X86_64_SSEUP_CLASS
7301 && regclass[2] == X86_64_SSEUP_CLASS
7302 && regclass[3] == X86_64_SSEUP_CLASS);
7308 && regclass[1] == X86_64_SSEUP_CLASS
7309 && regclass[2] == X86_64_SSEUP_CLASS
7310 && regclass[3] == X86_64_SSEUP_CLASS
7311 && regclass[4] == X86_64_SSEUP_CLASS
7312 && regclass[5] == X86_64_SSEUP_CLASS
7313 && regclass[6] == X86_64_SSEUP_CLASS
7314 && regclass[7] == X86_64_SSEUP_CLASS);
7322 = gen_rtx_EXPR_LIST (VOIDmode,
7323 gen_rtx_REG (tmpmode,
7324 SSE_REGNO (sse_regno)),
7333 /* Empty aligned struct, union or class. */
7337 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7338 for (i = 0; i < nexps; i++)
7339 XVECEXP (ret, 0, i) = exp [i];
7343 /* Update the data in CUM to advance over an argument of mode MODE
7344 and data type TYPE. (TYPE is null for libcalls where that information
7345 may not be available.)
7347 Return a number of integer regsiters advanced over. */
7350 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7351 const_tree type, HOST_WIDE_INT bytes,
7352 HOST_WIDE_INT words)
7370 cum->words += words;
7371 cum->nregs -= words;
7372 cum->regno += words;
7373 if (cum->nregs >= 0)
7375 if (cum->nregs <= 0)
7383 /* OImode shouldn't be used directly. */
7387 if (cum->float_in_sse < 2)
7390 if (cum->float_in_sse < 1)
7413 if (!type || !AGGREGATE_TYPE_P (type))
7415 cum->sse_words += words;
7416 cum->sse_nregs -= 1;
7417 cum->sse_regno += 1;
7418 if (cum->sse_nregs <= 0)
7432 if (!type || !AGGREGATE_TYPE_P (type))
7434 cum->mmx_words += words;
7435 cum->mmx_nregs -= 1;
7436 cum->mmx_regno += 1;
7437 if (cum->mmx_nregs <= 0)
7450 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7451 const_tree type, HOST_WIDE_INT words, bool named)
7453 int int_nregs, sse_nregs;
7455 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7456 if (!named && (VALID_AVX512F_REG_MODE (mode)
7457 || VALID_AVX256_REG_MODE (mode)))
7460 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7461 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7463 cum->nregs -= int_nregs;
7464 cum->sse_nregs -= sse_nregs;
7465 cum->regno += int_nregs;
7466 cum->sse_regno += sse_nregs;
7471 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7472 cum->words = (cum->words + align - 1) & ~(align - 1);
7473 cum->words += words;
7479 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7480 HOST_WIDE_INT words)
7482 /* Otherwise, this should be passed indirect. */
7483 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7485 cum->words += words;
7495 /* Update the data in CUM to advance over an argument of mode MODE and
7496 data type TYPE. (TYPE is null for libcalls where that information
7497 may not be available.) */
7500 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7501 const_tree type, bool named)
7503 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7504 HOST_WIDE_INT bytes, words;
7507 if (mode == BLKmode)
7508 bytes = int_size_in_bytes (type);
7510 bytes = GET_MODE_SIZE (mode);
7511 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7514 mode = type_natural_mode (type, NULL, false);
7516 if ((type && POINTER_BOUNDS_TYPE_P (type))
7517 || POINTER_BOUNDS_MODE_P (mode))
7519 /* If we pass bounds in BT then just update remained bounds count. */
7520 if (cum->bnds_in_bt)
7526 /* Update remained number of bounds to force. */
7527 if (cum->force_bnd_pass)
7528 cum->force_bnd_pass--;
7535 /* The first arg not going to Bounds Tables resets this counter. */
7536 cum->bnds_in_bt = 0;
7537 /* For unnamed args we always pass bounds to avoid bounds mess when
7538 passed and received types do not match. If bounds do not follow
7539 unnamed arg, still pretend required number of bounds were passed. */
7540 if (cum->force_bnd_pass)
7542 cum->bnd_regno += cum->force_bnd_pass;
7543 cum->force_bnd_pass = 0;
7546 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7547 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7548 else if (TARGET_64BIT)
7549 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7551 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7553 /* For stdarg we expect bounds to be passed for each value passed
7556 cum->force_bnd_pass = nregs;
7557 /* For pointers passed in memory we expect bounds passed in Bounds
7560 cum->bnds_in_bt = chkp_type_bounds_count (type);
7563 /* Define where to put the arguments to a function.
7564 Value is zero to push the argument on the stack,
7565 or a hard register in which to store the argument.
7567 MODE is the argument's machine mode.
7568 TYPE is the data type of the argument (as a tree).
7569 This is null for libcalls where that information may
7571 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7572 the preceding args and about the function being called.
7573 NAMED is nonzero if this argument is a named parameter
7574 (otherwise it is an extra parameter matching an ellipsis). */
7577 function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7578 machine_mode orig_mode, const_tree type,
7579 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7581 /* Avoid the AL settings for the Unix64 ABI. */
7582 if (mode == VOIDmode)
7598 if (words <= cum->nregs)
7600 int regno = cum->regno;
7602 /* Fastcall allocates the first two DWORD (SImode) or
7603 smaller arguments to ECX and EDX if it isn't an
7609 || (type && AGGREGATE_TYPE_P (type)))
7612 /* ECX not EAX is the first allocated register. */
7613 if (regno == AX_REG)
7616 return gen_rtx_REG (mode, regno);
7621 if (cum->float_in_sse < 2)
7624 if (cum->float_in_sse < 1)
7628 /* In 32bit, we pass TImode in xmm registers. */
7635 if (!type || !AGGREGATE_TYPE_P (type))
7638 return gen_reg_or_parallel (mode, orig_mode,
7639 cum->sse_regno + FIRST_SSE_REG);
7645 /* OImode and XImode shouldn't be used directly. */
7660 if (!type || !AGGREGATE_TYPE_P (type))
7663 return gen_reg_or_parallel (mode, orig_mode,
7664 cum->sse_regno + FIRST_SSE_REG);
7674 if (!type || !AGGREGATE_TYPE_P (type))
7677 return gen_reg_or_parallel (mode, orig_mode,
7678 cum->mmx_regno + FIRST_MMX_REG);
7687 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7688 machine_mode orig_mode, const_tree type, bool named)
7690 /* Handle a hidden AL argument containing number of registers
7691 for varargs x86-64 functions. */
7692 if (mode == VOIDmode)
7693 return GEN_INT (cum->maybe_vaarg
7694 ? (cum->sse_nregs < 0
7695 ? X86_64_SSE_REGPARM_MAX
7716 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7722 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7724 &x86_64_int_parameter_registers [cum->regno],
7729 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7730 machine_mode orig_mode, bool named,
7731 HOST_WIDE_INT bytes)
7735 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7736 We use value of -2 to specify that current function call is MSABI. */
7737 if (mode == VOIDmode)
7738 return GEN_INT (-2);
7740 /* If we've run out of registers, it goes on the stack. */
7741 if (cum->nregs == 0)
7744 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7746 /* Only floating point modes are passed in anything but integer regs. */
7747 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7750 regno = cum->regno + FIRST_SSE_REG;
7755 /* Unnamed floating parameters are passed in both the
7756 SSE and integer registers. */
7757 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7758 t2 = gen_rtx_REG (mode, regno);
7759 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7760 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7761 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7764 /* Handle aggregated types passed in register. */
7765 if (orig_mode == BLKmode)
7767 if (bytes > 0 && bytes <= 8)
7768 mode = (bytes > 4 ? DImode : SImode);
7769 if (mode == BLKmode)
7773 return gen_reg_or_parallel (mode, orig_mode, regno);
7776 /* Return where to put the arguments to a function.
7777 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7779 MODE is the argument's machine mode. TYPE is the data type of the
7780 argument. It is null for libcalls where that information may not be
7781 available. CUM gives information about the preceding args and about
7782 the function being called. NAMED is nonzero if this argument is a
7783 named parameter (otherwise it is an extra parameter matching an
7787 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
7788 const_tree type, bool named)
7790 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7791 machine_mode mode = omode;
7792 HOST_WIDE_INT bytes, words;
7795 /* All pointer bounds argumntas are handled separately here. */
7796 if ((type && POINTER_BOUNDS_TYPE_P (type))
7797 || POINTER_BOUNDS_MODE_P (mode))
7799 /* Return NULL if bounds are forced to go in Bounds Table. */
7800 if (cum->bnds_in_bt)
7802 /* Return the next available bound reg if any. */
7803 else if (cum->bnd_regno <= LAST_BND_REG)
7804 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
7805 /* Return the next special slot number otherwise. */
7807 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
7812 if (mode == BLKmode)
7813 bytes = int_size_in_bytes (type);
7815 bytes = GET_MODE_SIZE (mode);
7816 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7818 /* To simplify the code below, represent vector types with a vector mode
7819 even if MMX/SSE are not active. */
7820 if (type && TREE_CODE (type) == VECTOR_TYPE)
7821 mode = type_natural_mode (type, cum, false);
7823 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7824 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7825 else if (TARGET_64BIT)
7826 arg = function_arg_64 (cum, mode, omode, type, named);
7828 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7833 /* A C expression that indicates when an argument must be passed by
7834 reference. If nonzero for an argument, a copy of that argument is
7835 made in memory and a pointer to the argument is passed instead of
7836 the argument itself. The pointer is passed in whatever way is
7837 appropriate for passing a pointer to that type. */
7840 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7841 const_tree type, bool)
7843 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7845 /* See Windows x64 Software Convention. */
7846 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7848 int msize = (int) GET_MODE_SIZE (mode);
7851 /* Arrays are passed by reference. */
7852 if (TREE_CODE (type) == ARRAY_TYPE)
7855 if (AGGREGATE_TYPE_P (type))
7857 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7858 are passed by reference. */
7859 msize = int_size_in_bytes (type);
7863 /* __m128 is passed by reference. */
7865 case 1: case 2: case 4: case 8:
7871 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7877 /* Return true when TYPE should be 128bit aligned for 32bit argument
7878 passing ABI. XXX: This function is obsolete and is only used for
7879 checking psABI compatibility with previous versions of GCC. */
7882 ix86_compat_aligned_value_p (const_tree type)
7884 machine_mode mode = TYPE_MODE (type);
7885 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7889 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7891 if (TYPE_ALIGN (type) < 128)
7894 if (AGGREGATE_TYPE_P (type))
7896 /* Walk the aggregates recursively. */
7897 switch (TREE_CODE (type))
7901 case QUAL_UNION_TYPE:
7905 /* Walk all the structure fields. */
7906 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7908 if (TREE_CODE (field) == FIELD_DECL
7909 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7916 /* Just for use if some languages passes arrays by value. */
7917 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7928 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7929 XXX: This function is obsolete and is only used for checking psABI
7930 compatibility with previous versions of GCC. */
7933 ix86_compat_function_arg_boundary (machine_mode mode,
7934 const_tree type, unsigned int align)
7936 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7937 natural boundaries. */
7938 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7940 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7941 make an exception for SSE modes since these require 128bit
7944 The handling here differs from field_alignment. ICC aligns MMX
7945 arguments to 4 byte boundaries, while structure fields are aligned
7946 to 8 byte boundaries. */
7949 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7950 align = PARM_BOUNDARY;
7954 if (!ix86_compat_aligned_value_p (type))
7955 align = PARM_BOUNDARY;
7958 if (align > BIGGEST_ALIGNMENT)
7959 align = BIGGEST_ALIGNMENT;
7963 /* Return true when TYPE should be 128bit aligned for 32bit argument
7967 ix86_contains_aligned_value_p (const_tree type)
7969 machine_mode mode = TYPE_MODE (type);
7971 if (mode == XFmode || mode == XCmode)
7974 if (TYPE_ALIGN (type) < 128)
7977 if (AGGREGATE_TYPE_P (type))
7979 /* Walk the aggregates recursively. */
7980 switch (TREE_CODE (type))
7984 case QUAL_UNION_TYPE:
7988 /* Walk all the structure fields. */
7989 for (field = TYPE_FIELDS (type);
7991 field = DECL_CHAIN (field))
7993 if (TREE_CODE (field) == FIELD_DECL
7994 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
8001 /* Just for use if some languages passes arrays by value. */
8002 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
8011 return TYPE_ALIGN (type) >= 128;
8016 /* Gives the alignment boundary, in bits, of an argument with the
8017 specified mode and type. */
8020 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8025 /* Since the main variant type is used for call, we convert it to
8026 the main variant type. */
8027 type = TYPE_MAIN_VARIANT (type);
8028 align = TYPE_ALIGN (type);
8031 align = GET_MODE_ALIGNMENT (mode);
8032 if (align < PARM_BOUNDARY)
8033 align = PARM_BOUNDARY;
8037 unsigned int saved_align = align;
8041 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8044 if (mode == XFmode || mode == XCmode)
8045 align = PARM_BOUNDARY;
8047 else if (!ix86_contains_aligned_value_p (type))
8048 align = PARM_BOUNDARY;
8051 align = PARM_BOUNDARY;
8056 && align != ix86_compat_function_arg_boundary (mode, type,
8060 inform (input_location,
8061 "The ABI for passing parameters with %d-byte"
8062 " alignment has changed in GCC 4.6",
8063 align / BITS_PER_UNIT);
8070 /* Return true if N is a possible register number of function value. */
8073 ix86_function_value_regno_p (const unsigned int regno)
8080 return (!TARGET_64BIT || ix86_abi != MS_ABI);
8083 return TARGET_64BIT && ix86_abi != MS_ABI;
8086 return chkp_function_instrumented_p (current_function_decl);
8088 /* Complex values are returned in %st(0)/%st(1) pair. */
8091 /* TODO: The function should depend on current function ABI but
8092 builtins.c would need updating then. Therefore we use the
8094 if (TARGET_64BIT && ix86_abi == MS_ABI)
8096 return TARGET_FLOAT_RETURNS_IN_80387;
8098 /* Complex values are returned in %xmm0/%xmm1 pair. */
8104 if (TARGET_MACHO || TARGET_64BIT)
8112 /* Define how to find the value returned by a function.
8113 VALTYPE is the data type of the value (as a tree).
8114 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8115 otherwise, FUNC is 0. */
8118 function_value_32 (machine_mode orig_mode, machine_mode mode,
8119 const_tree fntype, const_tree fn)
8123 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8124 we normally prevent this case when mmx is not available. However
8125 some ABIs may require the result to be returned like DImode. */
8126 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8127 regno = FIRST_MMX_REG;
8129 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8130 we prevent this case when sse is not available. However some ABIs
8131 may require the result to be returned like integer TImode. */
8132 else if (mode == TImode
8133 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8134 regno = FIRST_SSE_REG;
8136 /* 32-byte vector modes in %ymm0. */
8137 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8138 regno = FIRST_SSE_REG;
8140 /* 64-byte vector modes in %zmm0. */
8141 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8142 regno = FIRST_SSE_REG;
8144 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8145 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8146 regno = FIRST_FLOAT_REG;
8148 /* Most things go in %eax. */
8151 /* Override FP return register with %xmm0 for local functions when
8152 SSE math is enabled or for functions with sseregparm attribute. */
8153 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8155 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8156 if ((sse_level >= 1 && mode == SFmode)
8157 || (sse_level == 2 && mode == DFmode))
8158 regno = FIRST_SSE_REG;
8161 /* OImode shouldn't be used directly. */
8162 gcc_assert (mode != OImode);
8164 return gen_rtx_REG (orig_mode, regno);
8168 function_value_64 (machine_mode orig_mode, machine_mode mode,
8173 /* Handle libcalls, which don't provide a type node. */
8174 if (valtype == NULL)
8188 regno = FIRST_SSE_REG;
8192 regno = FIRST_FLOAT_REG;
8200 return gen_rtx_REG (mode, regno);
8202 else if (POINTER_TYPE_P (valtype))
8204 /* Pointers are always returned in word_mode. */
8208 ret = construct_container (mode, orig_mode, valtype, 1,
8209 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8210 x86_64_int_return_registers, 0);
8212 /* For zero sized structures, construct_container returns NULL, but we
8213 need to keep rest of compiler happy by returning meaningful value. */
8215 ret = gen_rtx_REG (orig_mode, AX_REG);
8221 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8224 unsigned int regno = AX_REG;
8228 switch (GET_MODE_SIZE (mode))
8231 if (valtype != NULL_TREE
8232 && !VECTOR_INTEGER_TYPE_P (valtype)
8233 && !VECTOR_INTEGER_TYPE_P (valtype)
8234 && !INTEGRAL_TYPE_P (valtype)
8235 && !VECTOR_FLOAT_TYPE_P (valtype))
8237 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8238 && !COMPLEX_MODE_P (mode))
8239 regno = FIRST_SSE_REG;
8243 if (mode == SFmode || mode == DFmode)
8244 regno = FIRST_SSE_REG;
8250 return gen_rtx_REG (orig_mode, regno);
8254 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8255 machine_mode orig_mode, machine_mode mode)
8257 const_tree fn, fntype;
8260 if (fntype_or_decl && DECL_P (fntype_or_decl))
8261 fn = fntype_or_decl;
8262 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8264 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8265 || POINTER_BOUNDS_MODE_P (mode))
8266 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8267 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8268 return function_value_ms_64 (orig_mode, mode, valtype);
8269 else if (TARGET_64BIT)
8270 return function_value_64 (orig_mode, mode, valtype);
8272 return function_value_32 (orig_mode, mode, fntype, fn);
8276 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8278 machine_mode mode, orig_mode;
8280 orig_mode = TYPE_MODE (valtype);
8281 mode = type_natural_mode (valtype, NULL, true);
8282 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8285 /* Return an RTX representing a place where a function returns
8286 or recieves pointer bounds or NULL if no bounds are returned.
8288 VALTYPE is a data type of a value returned by the function.
8290 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8291 or FUNCTION_TYPE of the function.
8293 If OUTGOING is false, return a place in which the caller will
8294 see the return value. Otherwise, return a place where a
8295 function returns a value. */
8298 ix86_function_value_bounds (const_tree valtype,
8299 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8300 bool outgoing ATTRIBUTE_UNUSED)
8304 if (BOUNDED_TYPE_P (valtype))
8305 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8306 else if (chkp_type_has_pointer (valtype))
8311 unsigned i, bnd_no = 0;
8313 bitmap_obstack_initialize (NULL);
8314 slots = BITMAP_ALLOC (NULL);
8315 chkp_find_bound_slots (valtype, slots);
8317 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8319 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8320 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8321 gcc_assert (bnd_no < 2);
8322 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8325 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8327 BITMAP_FREE (slots);
8328 bitmap_obstack_release (NULL);
8336 /* Pointer function arguments and return values are promoted to
8340 ix86_promote_function_mode (const_tree type, machine_mode mode,
8341 int *punsignedp, const_tree fntype,
8344 if (type != NULL_TREE && POINTER_TYPE_P (type))
8346 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8349 return default_promote_function_mode (type, mode, punsignedp, fntype,
8353 /* Return true if a structure, union or array with MODE containing FIELD
8354 should be accessed using BLKmode. */
8357 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8359 /* Union with XFmode must be in BLKmode. */
8360 return (mode == XFmode
8361 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8362 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8366 ix86_libcall_value (machine_mode mode)
8368 return ix86_function_value_1 (NULL, NULL, mode, mode);
8371 /* Return true iff type is returned in memory. */
8374 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8376 #ifdef SUBTARGET_RETURN_IN_MEMORY
8377 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8379 const machine_mode mode = type_natural_mode (type, NULL, true);
8382 if (POINTER_BOUNDS_TYPE_P (type))
8387 if (ix86_function_type_abi (fntype) == MS_ABI)
8389 size = int_size_in_bytes (type);
8391 /* __m128 is returned in xmm0. */
8392 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8393 || INTEGRAL_TYPE_P (type)
8394 || VECTOR_FLOAT_TYPE_P (type))
8395 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8396 && !COMPLEX_MODE_P (mode)
8397 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8400 /* Otherwise, the size must be exactly in [1248]. */
8401 return size != 1 && size != 2 && size != 4 && size != 8;
8405 int needed_intregs, needed_sseregs;
8407 return examine_argument (mode, type, 1,
8408 &needed_intregs, &needed_sseregs);
8413 if (mode == BLKmode)
8416 size = int_size_in_bytes (type);
8418 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8421 if (VECTOR_MODE_P (mode) || mode == TImode)
8423 /* User-created vectors small enough to fit in EAX. */
8427 /* Unless ABI prescibes otherwise,
8428 MMX/3dNow values are returned in MM0 if available. */
8431 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8433 /* SSE values are returned in XMM0 if available. */
8437 /* AVX values are returned in YMM0 if available. */
8441 /* AVX512F values are returned in ZMM0 if available. */
8443 return !TARGET_AVX512F;
8452 /* OImode shouldn't be used directly. */
8453 gcc_assert (mode != OImode);
8461 /* Create the va_list data type. */
8463 /* Returns the calling convention specific va_list date type.
8464 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8467 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8469 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8471 /* For i386 we use plain pointer to argument area. */
8472 if (!TARGET_64BIT || abi == MS_ABI)
8473 return build_pointer_type (char_type_node);
8475 record = lang_hooks.types.make_type (RECORD_TYPE);
8476 type_decl = build_decl (BUILTINS_LOCATION,
8477 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8479 f_gpr = build_decl (BUILTINS_LOCATION,
8480 FIELD_DECL, get_identifier ("gp_offset"),
8481 unsigned_type_node);
8482 f_fpr = build_decl (BUILTINS_LOCATION,
8483 FIELD_DECL, get_identifier ("fp_offset"),
8484 unsigned_type_node);
8485 f_ovf = build_decl (BUILTINS_LOCATION,
8486 FIELD_DECL, get_identifier ("overflow_arg_area"),
8488 f_sav = build_decl (BUILTINS_LOCATION,
8489 FIELD_DECL, get_identifier ("reg_save_area"),
8492 va_list_gpr_counter_field = f_gpr;
8493 va_list_fpr_counter_field = f_fpr;
8495 DECL_FIELD_CONTEXT (f_gpr) = record;
8496 DECL_FIELD_CONTEXT (f_fpr) = record;
8497 DECL_FIELD_CONTEXT (f_ovf) = record;
8498 DECL_FIELD_CONTEXT (f_sav) = record;
8500 TYPE_STUB_DECL (record) = type_decl;
8501 TYPE_NAME (record) = type_decl;
8502 TYPE_FIELDS (record) = f_gpr;
8503 DECL_CHAIN (f_gpr) = f_fpr;
8504 DECL_CHAIN (f_fpr) = f_ovf;
8505 DECL_CHAIN (f_ovf) = f_sav;
8507 layout_type (record);
8509 /* The correct type is an array type of one element. */
8510 return build_array_type (record, build_index_type (size_zero_node));
8513 /* Setup the builtin va_list data type and for 64-bit the additional
8514 calling convention specific va_list data types. */
8517 ix86_build_builtin_va_list (void)
8519 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8521 /* Initialize abi specific va_list builtin types. */
8525 if (ix86_abi == MS_ABI)
8527 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8528 if (TREE_CODE (t) != RECORD_TYPE)
8529 t = build_variant_type_copy (t);
8530 sysv_va_list_type_node = t;
8535 if (TREE_CODE (t) != RECORD_TYPE)
8536 t = build_variant_type_copy (t);
8537 sysv_va_list_type_node = t;
8539 if (ix86_abi != MS_ABI)
8541 t = ix86_build_builtin_va_list_abi (MS_ABI);
8542 if (TREE_CODE (t) != RECORD_TYPE)
8543 t = build_variant_type_copy (t);
8544 ms_va_list_type_node = t;
8549 if (TREE_CODE (t) != RECORD_TYPE)
8550 t = build_variant_type_copy (t);
8551 ms_va_list_type_node = t;
8558 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8561 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8567 /* GPR size of varargs save area. */
8568 if (cfun->va_list_gpr_size)
8569 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8571 ix86_varargs_gpr_size = 0;
8573 /* FPR size of varargs save area. We don't need it if we don't pass
8574 anything in SSE registers. */
8575 if (TARGET_SSE && cfun->va_list_fpr_size)
8576 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8578 ix86_varargs_fpr_size = 0;
8580 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8583 save_area = frame_pointer_rtx;
8584 set = get_varargs_alias_set ();
8586 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8587 if (max > X86_64_REGPARM_MAX)
8588 max = X86_64_REGPARM_MAX;
8590 for (i = cum->regno; i < max; i++)
8592 mem = gen_rtx_MEM (word_mode,
8593 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8594 MEM_NOTRAP_P (mem) = 1;
8595 set_mem_alias_set (mem, set);
8596 emit_move_insn (mem,
8597 gen_rtx_REG (word_mode,
8598 x86_64_int_parameter_registers[i]));
8601 if (ix86_varargs_fpr_size)
8604 rtx_code_label *label;
8607 /* Now emit code to save SSE registers. The AX parameter contains number
8608 of SSE parameter registers used to call this function, though all we
8609 actually check here is the zero/non-zero status. */
8611 label = gen_label_rtx ();
8612 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8613 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8616 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8617 we used movdqa (i.e. TImode) instead? Perhaps even better would
8618 be if we could determine the real mode of the data, via a hook
8619 into pass_stdarg. Ignore all that for now. */
8621 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8622 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8624 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8625 if (max > X86_64_SSE_REGPARM_MAX)
8626 max = X86_64_SSE_REGPARM_MAX;
8628 for (i = cum->sse_regno; i < max; ++i)
8630 mem = plus_constant (Pmode, save_area,
8631 i * 16 + ix86_varargs_gpr_size);
8632 mem = gen_rtx_MEM (smode, mem);
8633 MEM_NOTRAP_P (mem) = 1;
8634 set_mem_alias_set (mem, set);
8635 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8637 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8645 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8647 alias_set_type set = get_varargs_alias_set ();
8650 /* Reset to zero, as there might be a sysv vaarg used
8652 ix86_varargs_gpr_size = 0;
8653 ix86_varargs_fpr_size = 0;
8655 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8659 mem = gen_rtx_MEM (Pmode,
8660 plus_constant (Pmode, virtual_incoming_args_rtx,
8661 i * UNITS_PER_WORD));
8662 MEM_NOTRAP_P (mem) = 1;
8663 set_mem_alias_set (mem, set);
8665 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8666 emit_move_insn (mem, reg);
8671 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8672 tree type, int *, int no_rtl)
8674 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8675 CUMULATIVE_ARGS next_cum;
8678 /* This argument doesn't appear to be used anymore. Which is good,
8679 because the old code here didn't suppress rtl generation. */
8680 gcc_assert (!no_rtl);
8685 fntype = TREE_TYPE (current_function_decl);
8687 /* For varargs, we do not want to skip the dummy va_dcl argument.
8688 For stdargs, we do want to skip the last named argument. */
8690 if (stdarg_p (fntype))
8691 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8694 if (cum->call_abi == MS_ABI)
8695 setup_incoming_varargs_ms_64 (&next_cum);
8697 setup_incoming_varargs_64 (&next_cum);
8701 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8702 enum machine_mode mode,
8704 int *pretend_size ATTRIBUTE_UNUSED,
8707 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8708 CUMULATIVE_ARGS next_cum;
8711 int bnd_reg, i, max;
8713 gcc_assert (!no_rtl);
8715 /* Do nothing if we use plain pointer to argument area. */
8716 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8719 fntype = TREE_TYPE (current_function_decl);
8721 /* For varargs, we do not want to skip the dummy va_dcl argument.
8722 For stdargs, we do want to skip the last named argument. */
8724 if (stdarg_p (fntype))
8725 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8727 save_area = frame_pointer_rtx;
8729 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8730 if (max > X86_64_REGPARM_MAX)
8731 max = X86_64_REGPARM_MAX;
8733 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8734 if (chkp_function_instrumented_p (current_function_decl))
8735 for (i = cum->regno; i < max; i++)
8737 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8738 rtx reg = gen_rtx_REG (DImode,
8739 x86_64_int_parameter_registers[i]);
8743 if (bnd_reg <= LAST_BND_REG)
8744 bounds = gen_rtx_REG (BNDmode, bnd_reg);
8748 plus_constant (Pmode, arg_pointer_rtx,
8749 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8750 bounds = gen_reg_rtx (BNDmode);
8751 emit_insn (BNDmode == BND64mode
8752 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8753 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
8756 emit_insn (BNDmode == BND64mode
8757 ? gen_bnd64_stx (addr, ptr, bounds)
8758 : gen_bnd32_stx (addr, ptr, bounds));
8765 /* Checks if TYPE is of kind va_list char *. */
8768 is_va_list_char_pointer (tree type)
8772 /* For 32-bit it is always true. */
8775 canonic = ix86_canonical_va_list_type (type);
8776 return (canonic == ms_va_list_type_node
8777 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8780 /* Implement va_start. */
8783 ix86_va_start (tree valist, rtx nextarg)
8785 HOST_WIDE_INT words, n_gpr, n_fpr;
8786 tree f_gpr, f_fpr, f_ovf, f_sav;
8787 tree gpr, fpr, ovf, sav, t;
8791 if (flag_split_stack
8792 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8794 unsigned int scratch_regno;
8796 /* When we are splitting the stack, we can't refer to the stack
8797 arguments using internal_arg_pointer, because they may be on
8798 the old stack. The split stack prologue will arrange to
8799 leave a pointer to the old stack arguments in a scratch
8800 register, which we here copy to a pseudo-register. The split
8801 stack prologue can't set the pseudo-register directly because
8802 it (the prologue) runs before any registers have been saved. */
8804 scratch_regno = split_stack_prologue_scratch_regno ();
8805 if (scratch_regno != INVALID_REGNUM)
8810 reg = gen_reg_rtx (Pmode);
8811 cfun->machine->split_stack_varargs_pointer = reg;
8814 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8818 push_topmost_sequence ();
8819 emit_insn_after (seq, entry_of_function ());
8820 pop_topmost_sequence ();
8824 /* Only 64bit target needs something special. */
8825 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8827 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8828 std_expand_builtin_va_start (valist, nextarg);
8833 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8834 next = expand_binop (ptr_mode, add_optab,
8835 cfun->machine->split_stack_varargs_pointer,
8836 crtl->args.arg_offset_rtx,
8837 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8838 convert_move (va_r, next, 0);
8840 /* Store zero bounds for va_list. */
8841 if (chkp_function_instrumented_p (current_function_decl))
8842 chkp_expand_bounds_reset_for_mem (valist,
8843 make_tree (TREE_TYPE (valist),
8850 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8851 f_fpr = DECL_CHAIN (f_gpr);
8852 f_ovf = DECL_CHAIN (f_fpr);
8853 f_sav = DECL_CHAIN (f_ovf);
8855 valist = build_simple_mem_ref (valist);
8856 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8857 /* The following should be folded into the MEM_REF offset. */
8858 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8860 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8862 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8864 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8867 /* Count number of gp and fp argument registers used. */
8868 words = crtl->args.info.words;
8869 n_gpr = crtl->args.info.regno;
8870 n_fpr = crtl->args.info.sse_regno;
8872 if (cfun->va_list_gpr_size)
8874 type = TREE_TYPE (gpr);
8875 t = build2 (MODIFY_EXPR, type,
8876 gpr, build_int_cst (type, n_gpr * 8));
8877 TREE_SIDE_EFFECTS (t) = 1;
8878 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8881 if (TARGET_SSE && cfun->va_list_fpr_size)
8883 type = TREE_TYPE (fpr);
8884 t = build2 (MODIFY_EXPR, type, fpr,
8885 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8886 TREE_SIDE_EFFECTS (t) = 1;
8887 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8890 /* Find the overflow area. */
8891 type = TREE_TYPE (ovf);
8892 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8893 ovf_rtx = crtl->args.internal_arg_pointer;
8895 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8896 t = make_tree (type, ovf_rtx);
8898 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8900 /* Store zero bounds for overflow area pointer. */
8901 if (chkp_function_instrumented_p (current_function_decl))
8902 chkp_expand_bounds_reset_for_mem (ovf, t);
8904 t = build2 (MODIFY_EXPR, type, ovf, t);
8905 TREE_SIDE_EFFECTS (t) = 1;
8906 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8908 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8910 /* Find the register save area.
8911 Prologue of the function save it right above stack frame. */
8912 type = TREE_TYPE (sav);
8913 t = make_tree (type, frame_pointer_rtx);
8914 if (!ix86_varargs_gpr_size)
8915 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8917 /* Store zero bounds for save area pointer. */
8918 if (chkp_function_instrumented_p (current_function_decl))
8919 chkp_expand_bounds_reset_for_mem (sav, t);
8921 t = build2 (MODIFY_EXPR, type, sav, t);
8922 TREE_SIDE_EFFECTS (t) = 1;
8923 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8927 /* Implement va_arg. */
8930 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8933 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8934 tree f_gpr, f_fpr, f_ovf, f_sav;
8935 tree gpr, fpr, ovf, sav, t;
8937 tree lab_false, lab_over = NULL_TREE;
8942 machine_mode nat_mode;
8943 unsigned int arg_boundary;
8945 /* Only 64bit target needs something special. */
8946 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8947 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8949 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8950 f_fpr = DECL_CHAIN (f_gpr);
8951 f_ovf = DECL_CHAIN (f_fpr);
8952 f_sav = DECL_CHAIN (f_ovf);
8954 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8955 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8956 valist = build_va_arg_indirect_ref (valist);
8957 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8958 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8959 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8961 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8963 type = build_pointer_type (type);
8964 size = int_size_in_bytes (type);
8965 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8967 nat_mode = type_natural_mode (type, NULL, false);
8982 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8983 if (!TARGET_64BIT_MS_ABI)
8990 container = construct_container (nat_mode, TYPE_MODE (type),
8991 type, 0, X86_64_REGPARM_MAX,
8992 X86_64_SSE_REGPARM_MAX, intreg,
8997 /* Pull the value out of the saved registers. */
8999 addr = create_tmp_var (ptr_type_node, "addr");
9003 int needed_intregs, needed_sseregs;
9005 tree int_addr, sse_addr;
9007 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9008 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9010 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9012 need_temp = (!REG_P (container)
9013 && ((needed_intregs && TYPE_ALIGN (type) > 64)
9014 || TYPE_ALIGN (type) > 128));
9016 /* In case we are passing structure, verify that it is consecutive block
9017 on the register save area. If not we need to do moves. */
9018 if (!need_temp && !REG_P (container))
9020 /* Verify that all registers are strictly consecutive */
9021 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9025 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9027 rtx slot = XVECEXP (container, 0, i);
9028 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9029 || INTVAL (XEXP (slot, 1)) != i * 16)
9037 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9039 rtx slot = XVECEXP (container, 0, i);
9040 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9041 || INTVAL (XEXP (slot, 1)) != i * 8)
9053 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9054 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9057 /* First ensure that we fit completely in registers. */
9060 t = build_int_cst (TREE_TYPE (gpr),
9061 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9062 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9063 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9064 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9065 gimplify_and_add (t, pre_p);
9069 t = build_int_cst (TREE_TYPE (fpr),
9070 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9071 + X86_64_REGPARM_MAX * 8);
9072 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9073 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9074 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9075 gimplify_and_add (t, pre_p);
9078 /* Compute index to start of area used for integer regs. */
9081 /* int_addr = gpr + sav; */
9082 t = fold_build_pointer_plus (sav, gpr);
9083 gimplify_assign (int_addr, t, pre_p);
9087 /* sse_addr = fpr + sav; */
9088 t = fold_build_pointer_plus (sav, fpr);
9089 gimplify_assign (sse_addr, t, pre_p);
9093 int i, prev_size = 0;
9094 tree temp = create_tmp_var (type, "va_arg_tmp");
9097 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9098 gimplify_assign (addr, t, pre_p);
9100 for (i = 0; i < XVECLEN (container, 0); i++)
9102 rtx slot = XVECEXP (container, 0, i);
9103 rtx reg = XEXP (slot, 0);
9104 machine_mode mode = GET_MODE (reg);
9110 tree dest_addr, dest;
9111 int cur_size = GET_MODE_SIZE (mode);
9113 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9114 prev_size = INTVAL (XEXP (slot, 1));
9115 if (prev_size + cur_size > size)
9117 cur_size = size - prev_size;
9118 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9119 if (mode == BLKmode)
9122 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9123 if (mode == GET_MODE (reg))
9124 addr_type = build_pointer_type (piece_type);
9126 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9128 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9131 if (SSE_REGNO_P (REGNO (reg)))
9133 src_addr = sse_addr;
9134 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9138 src_addr = int_addr;
9139 src_offset = REGNO (reg) * 8;
9141 src_addr = fold_convert (addr_type, src_addr);
9142 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9144 dest_addr = fold_convert (daddr_type, addr);
9145 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9146 if (cur_size == GET_MODE_SIZE (mode))
9148 src = build_va_arg_indirect_ref (src_addr);
9149 dest = build_va_arg_indirect_ref (dest_addr);
9151 gimplify_assign (dest, src, pre_p);
9156 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9157 3, dest_addr, src_addr,
9158 size_int (cur_size));
9159 gimplify_and_add (copy, pre_p);
9161 prev_size += cur_size;
9167 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9168 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9169 gimplify_assign (gpr, t, pre_p);
9174 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9175 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9176 gimplify_assign (fpr, t, pre_p);
9179 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9181 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9184 /* ... otherwise out of the overflow area. */
9186 /* When we align parameter on stack for caller, if the parameter
9187 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9188 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9189 here with caller. */
9190 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9191 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9192 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9194 /* Care for on-stack alignment if needed. */
9195 if (arg_boundary <= 64 || size == 0)
9199 HOST_WIDE_INT align = arg_boundary / 8;
9200 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9201 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9202 build_int_cst (TREE_TYPE (t), -align));
9205 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9206 gimplify_assign (addr, t, pre_p);
9208 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9209 gimplify_assign (unshare_expr (ovf), t, pre_p);
9212 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9214 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9215 addr = fold_convert (ptrtype, addr);
9218 addr = build_va_arg_indirect_ref (addr);
9219 return build_va_arg_indirect_ref (addr);
9222 /* Return true if OPNUM's MEM should be matched
9223 in movabs* patterns. */
9226 ix86_check_movabs (rtx insn, int opnum)
9230 set = PATTERN (insn);
9231 if (GET_CODE (set) == PARALLEL)
9232 set = XVECEXP (set, 0, 0);
9233 gcc_assert (GET_CODE (set) == SET);
9234 mem = XEXP (set, opnum);
9235 while (GET_CODE (mem) == SUBREG)
9236 mem = SUBREG_REG (mem);
9237 gcc_assert (MEM_P (mem));
9238 return volatile_ok || !MEM_VOLATILE_P (mem);
9241 /* Initialize the table of extra 80387 mathematical constants. */
9244 init_ext_80387_constants (void)
9246 static const char * cst[5] =
9248 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9249 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9250 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9251 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9252 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9256 for (i = 0; i < 5; i++)
9258 real_from_string (&ext_80387_constants_table[i], cst[i]);
9259 /* Ensure each constant is rounded to XFmode precision. */
9260 real_convert (&ext_80387_constants_table[i],
9261 XFmode, &ext_80387_constants_table[i]);
9264 ext_80387_constants_init = 1;
9267 /* Return non-zero if the constant is something that
9268 can be loaded with a special instruction. */
9271 standard_80387_constant_p (rtx x)
9273 machine_mode mode = GET_MODE (x);
9277 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
9280 if (x == CONST0_RTX (mode))
9282 if (x == CONST1_RTX (mode))
9285 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9287 /* For XFmode constants, try to find a special 80387 instruction when
9288 optimizing for size or on those CPUs that benefit from them. */
9290 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9294 if (! ext_80387_constants_init)
9295 init_ext_80387_constants ();
9297 for (i = 0; i < 5; i++)
9298 if (real_identical (&r, &ext_80387_constants_table[i]))
9302 /* Load of the constant -0.0 or -1.0 will be split as
9303 fldz;fchs or fld1;fchs sequence. */
9304 if (real_isnegzero (&r))
9306 if (real_identical (&r, &dconstm1))
9312 /* Return the opcode of the special instruction to be used to load
9316 standard_80387_constant_opcode (rtx x)
9318 switch (standard_80387_constant_p (x))
9342 /* Return the CONST_DOUBLE representing the 80387 constant that is
9343 loaded by the specified special instruction. The argument IDX
9344 matches the return value from standard_80387_constant_p. */
9347 standard_80387_constant_rtx (int idx)
9351 if (! ext_80387_constants_init)
9352 init_ext_80387_constants ();
9368 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9372 /* Return 1 if X is all 0s and 2 if x is all 1s
9373 in supported SSE/AVX vector mode. */
9376 standard_sse_constant_p (rtx x)
9378 machine_mode mode = GET_MODE (x);
9380 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
9382 if (vector_all_ones_operand (x, mode))
9410 /* Return the opcode of the special instruction to be used to load
9414 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9416 switch (standard_sse_constant_p (x))
9419 switch (get_attr_mode (insn))
9422 return "vpxord\t%g0, %g0, %g0";
9424 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9425 : "vpxord\t%g0, %g0, %g0";
9427 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9428 : "vpxorq\t%g0, %g0, %g0";
9430 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9431 : "%vpxor\t%0, %d0";
9433 return "%vxorpd\t%0, %d0";
9435 return "%vxorps\t%0, %d0";
9438 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9439 : "vpxor\t%x0, %x0, %x0";
9441 return "vxorpd\t%x0, %x0, %x0";
9443 return "vxorps\t%x0, %x0, %x0";
9451 || get_attr_mode (insn) == MODE_XI
9452 || get_attr_mode (insn) == MODE_V8DF
9453 || get_attr_mode (insn) == MODE_V16SF)
9454 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9456 return "vpcmpeqd\t%0, %0, %0";
9458 return "pcmpeqd\t%0, %0";
9466 /* Returns true if OP contains a symbol reference */
9469 symbolic_reference_mentioned_p (rtx op)
9474 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9477 fmt = GET_RTX_FORMAT (GET_CODE (op));
9478 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9484 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9485 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9489 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9496 /* Return true if it is appropriate to emit `ret' instructions in the
9497 body of a function. Do this only if the epilogue is simple, needing a
9498 couple of insns. Prior to reloading, we can't tell how many registers
9499 must be saved, so return false then. Return false if there is no frame
9500 marker to de-allocate. */
9503 ix86_can_use_return_insn_p (void)
9505 struct ix86_frame frame;
9507 if (! reload_completed || frame_pointer_needed)
9510 /* Don't allow more than 32k pop, since that's all we can do
9511 with one instruction. */
9512 if (crtl->args.pops_args && crtl->args.size >= 32768)
9515 ix86_compute_frame_layout (&frame);
9516 return (frame.stack_pointer_offset == UNITS_PER_WORD
9517 && (frame.nregs + frame.nsseregs) == 0);
9520 /* Value should be nonzero if functions must have frame pointers.
9521 Zero means the frame pointer need not be set up (and parms may
9522 be accessed via the stack pointer) in functions that seem suitable. */
9525 ix86_frame_pointer_required (void)
9527 /* If we accessed previous frames, then the generated code expects
9528 to be able to access the saved ebp value in our frame. */
9529 if (cfun->machine->accesses_prev_frame)
9532 /* Several x86 os'es need a frame pointer for other reasons,
9533 usually pertaining to setjmp. */
9534 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9537 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9538 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9541 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9542 allocation is 4GB. */
9543 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9546 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9547 turns off the frame pointer by default. Turn it back on now if
9548 we've not got a leaf function. */
9549 if (TARGET_OMIT_LEAF_FRAME_POINTER
9551 || ix86_current_function_calls_tls_descriptor))
9554 if (crtl->profile && !flag_fentry)
9560 /* Record that the current function accesses previous call frames. */
9563 ix86_setup_frame_addresses (void)
9565 cfun->machine->accesses_prev_frame = 1;
9568 #ifndef USE_HIDDEN_LINKONCE
9569 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9570 # define USE_HIDDEN_LINKONCE 1
9572 # define USE_HIDDEN_LINKONCE 0
9576 static int pic_labels_used;
9578 /* Fills in the label name that should be used for a pc thunk for
9579 the given register. */
9582 get_pc_thunk_name (char name[32], unsigned int regno)
9584 gcc_assert (!TARGET_64BIT);
9586 if (USE_HIDDEN_LINKONCE)
9587 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9589 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9593 /* This function generates code for -fpic that loads %ebx with
9594 the return address of the caller and then returns. */
9597 ix86_code_end (void)
9602 for (regno = AX_REG; regno <= SP_REG; regno++)
9607 if (!(pic_labels_used & (1 << regno)))
9610 get_pc_thunk_name (name, regno);
9612 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9613 get_identifier (name),
9614 build_function_type_list (void_type_node, NULL_TREE));
9615 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9616 NULL_TREE, void_type_node);
9617 TREE_PUBLIC (decl) = 1;
9618 TREE_STATIC (decl) = 1;
9619 DECL_IGNORED_P (decl) = 1;
9624 switch_to_section (darwin_sections[text_coal_section]);
9625 fputs ("\t.weak_definition\t", asm_out_file);
9626 assemble_name (asm_out_file, name);
9627 fputs ("\n\t.private_extern\t", asm_out_file);
9628 assemble_name (asm_out_file, name);
9629 putc ('\n', asm_out_file);
9630 ASM_OUTPUT_LABEL (asm_out_file, name);
9631 DECL_WEAK (decl) = 1;
9635 if (USE_HIDDEN_LINKONCE)
9637 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9639 targetm.asm_out.unique_section (decl, 0);
9640 switch_to_section (get_named_section (decl, NULL, 0));
9642 targetm.asm_out.globalize_label (asm_out_file, name);
9643 fputs ("\t.hidden\t", asm_out_file);
9644 assemble_name (asm_out_file, name);
9645 putc ('\n', asm_out_file);
9646 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9650 switch_to_section (text_section);
9651 ASM_OUTPUT_LABEL (asm_out_file, name);
9654 DECL_INITIAL (decl) = make_node (BLOCK);
9655 current_function_decl = decl;
9656 init_function_start (decl);
9657 first_function_block_is_cold = false;
9658 /* Make sure unwind info is emitted for the thunk if needed. */
9659 final_start_function (emit_barrier (), asm_out_file, 1);
9661 /* Pad stack IP move with 4 instructions (two NOPs count
9662 as one instruction). */
9663 if (TARGET_PAD_SHORT_FUNCTION)
9668 fputs ("\tnop\n", asm_out_file);
9671 xops[0] = gen_rtx_REG (Pmode, regno);
9672 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9673 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9674 output_asm_insn ("%!ret", NULL);
9675 final_end_function ();
9676 init_insn_lengths ();
9677 free_after_compilation (cfun);
9679 current_function_decl = NULL;
9682 if (flag_split_stack)
9683 file_end_indicate_split_stack ();
9686 /* Emit code for the SET_GOT patterns. */
9689 output_set_got (rtx dest, rtx label)
9695 if (TARGET_VXWORKS_RTP && flag_pic)
9697 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9698 xops[2] = gen_rtx_MEM (Pmode,
9699 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9700 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9702 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9703 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9704 an unadorned address. */
9705 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9706 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9707 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9711 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9716 /* We don't need a pic base, we're not producing pic. */
9719 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9720 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9721 targetm.asm_out.internal_label (asm_out_file, "L",
9722 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9727 get_pc_thunk_name (name, REGNO (dest));
9728 pic_labels_used |= 1 << REGNO (dest);
9730 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9731 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9732 output_asm_insn ("%!call\t%X2", xops);
9735 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9736 This is what will be referenced by the Mach-O PIC subsystem. */
9737 if (machopic_should_output_picbase_label () || !label)
9738 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9740 /* When we are restoring the pic base at the site of a nonlocal label,
9741 and we decided to emit the pic base above, we will still output a
9742 local label used for calculating the correction offset (even though
9743 the offset will be 0 in that case). */
9745 targetm.asm_out.internal_label (asm_out_file, "L",
9746 CODE_LABEL_NUMBER (label));
9751 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9756 /* Generate an "push" pattern for input ARG. */
9761 struct machine_function *m = cfun->machine;
9763 if (m->fs.cfa_reg == stack_pointer_rtx)
9764 m->fs.cfa_offset += UNITS_PER_WORD;
9765 m->fs.sp_offset += UNITS_PER_WORD;
9767 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9768 arg = gen_rtx_REG (word_mode, REGNO (arg));
9770 return gen_rtx_SET (VOIDmode,
9771 gen_rtx_MEM (word_mode,
9772 gen_rtx_PRE_DEC (Pmode,
9773 stack_pointer_rtx)),
9777 /* Generate an "pop" pattern for input ARG. */
9782 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9783 arg = gen_rtx_REG (word_mode, REGNO (arg));
9785 return gen_rtx_SET (VOIDmode,
9787 gen_rtx_MEM (word_mode,
9788 gen_rtx_POST_INC (Pmode,
9789 stack_pointer_rtx)));
9792 /* Return >= 0 if there is an unused call-clobbered register available
9793 for the entire function. */
9796 ix86_select_alt_pic_regnum (void)
9798 if (ix86_use_pseudo_pic_reg ())
9799 return INVALID_REGNUM;
9803 && !ix86_current_function_calls_tls_descriptor)
9806 /* Can't use the same register for both PIC and DRAP. */
9808 drap = REGNO (crtl->drap_reg);
9811 for (i = 2; i >= 0; --i)
9812 if (i != drap && !df_regs_ever_live_p (i))
9816 return INVALID_REGNUM;
9819 /* Return TRUE if we need to save REGNO. */
9822 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9824 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
9825 && pic_offset_table_rtx)
9827 if (ix86_use_pseudo_pic_reg ())
9829 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
9830 _mcount in prologue. */
9831 if (!TARGET_64BIT && flag_pic && crtl->profile)
9834 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9836 || crtl->calls_eh_return
9837 || crtl->uses_const_pool
9838 || cfun->has_nonlocal_label)
9839 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9842 if (crtl->calls_eh_return && maybe_eh_return)
9847 unsigned test = EH_RETURN_DATA_REGNO (i);
9848 if (test == INVALID_REGNUM)
9856 && regno == REGNO (crtl->drap_reg)
9857 && !cfun->machine->no_drap_save_restore)
9860 return (df_regs_ever_live_p (regno)
9861 && !call_used_regs[regno]
9862 && !fixed_regs[regno]
9863 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9866 /* Return number of saved general prupose registers. */
9869 ix86_nsaved_regs (void)
9874 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9875 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9880 /* Return number of saved SSE registrers. */
9883 ix86_nsaved_sseregs (void)
9888 if (!TARGET_64BIT_MS_ABI)
9890 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9891 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9896 /* Given FROM and TO register numbers, say whether this elimination is
9897 allowed. If stack alignment is needed, we can only replace argument
9898 pointer with hard frame pointer, or replace frame pointer with stack
9899 pointer. Otherwise, frame pointer elimination is automatically
9900 handled and all other eliminations are valid. */
9903 ix86_can_eliminate (const int from, const int to)
9905 if (stack_realign_fp)
9906 return ((from == ARG_POINTER_REGNUM
9907 && to == HARD_FRAME_POINTER_REGNUM)
9908 || (from == FRAME_POINTER_REGNUM
9909 && to == STACK_POINTER_REGNUM));
9911 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9914 /* Return the offset between two registers, one to be eliminated, and the other
9915 its replacement, at the start of a routine. */
9918 ix86_initial_elimination_offset (int from, int to)
9920 struct ix86_frame frame;
9921 ix86_compute_frame_layout (&frame);
9923 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9924 return frame.hard_frame_pointer_offset;
9925 else if (from == FRAME_POINTER_REGNUM
9926 && to == HARD_FRAME_POINTER_REGNUM)
9927 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9930 gcc_assert (to == STACK_POINTER_REGNUM);
9932 if (from == ARG_POINTER_REGNUM)
9933 return frame.stack_pointer_offset;
9935 gcc_assert (from == FRAME_POINTER_REGNUM);
9936 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9940 /* In a dynamically-aligned function, we can't know the offset from
9941 stack pointer to frame pointer, so we must ensure that setjmp
9942 eliminates fp against the hard fp (%ebp) rather than trying to
9943 index from %esp up to the top of the frame across a gap that is
9944 of unknown (at compile-time) size. */
9946 ix86_builtin_setjmp_frame_value (void)
9948 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9951 /* When using -fsplit-stack, the allocation routines set a field in
9952 the TCB to the bottom of the stack plus this much space, measured
9955 #define SPLIT_STACK_AVAILABLE 256
9957 /* Fill structure ix86_frame about frame of currently computed function. */
9960 ix86_compute_frame_layout (struct ix86_frame *frame)
9962 unsigned HOST_WIDE_INT stack_alignment_needed;
9963 HOST_WIDE_INT offset;
9964 unsigned HOST_WIDE_INT preferred_alignment;
9965 HOST_WIDE_INT size = get_frame_size ();
9966 HOST_WIDE_INT to_allocate;
9968 frame->nregs = ix86_nsaved_regs ();
9969 frame->nsseregs = ix86_nsaved_sseregs ();
9971 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9972 function prologues and leaf. */
9973 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9974 && (!crtl->is_leaf || cfun->calls_alloca != 0
9975 || ix86_current_function_calls_tls_descriptor))
9977 crtl->preferred_stack_boundary = 128;
9978 crtl->stack_alignment_needed = 128;
9980 /* preferred_stack_boundary is never updated for call
9981 expanded from tls descriptor. Update it here. We don't update it in
9982 expand stage because according to the comments before
9983 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9985 else if (ix86_current_function_calls_tls_descriptor
9986 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9988 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9989 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9990 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9993 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9994 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9996 gcc_assert (!size || stack_alignment_needed);
9997 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9998 gcc_assert (preferred_alignment <= stack_alignment_needed);
10000 /* For SEH we have to limit the amount of code movement into the prologue.
10001 At present we do this via a BLOCKAGE, at which point there's very little
10002 scheduling that can be done, which means that there's very little point
10003 in doing anything except PUSHs. */
10005 cfun->machine->use_fast_prologue_epilogue = false;
10007 /* During reload iteration the amount of registers saved can change.
10008 Recompute the value as needed. Do not recompute when amount of registers
10009 didn't change as reload does multiple calls to the function and does not
10010 expect the decision to change within single iteration. */
10011 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
10012 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
10014 int count = frame->nregs;
10015 struct cgraph_node *node = cgraph_node::get (current_function_decl);
10017 cfun->machine->use_fast_prologue_epilogue_nregs = count;
10019 /* The fast prologue uses move instead of push to save registers. This
10020 is significantly longer, but also executes faster as modern hardware
10021 can execute the moves in parallel, but can't do that for push/pop.
10023 Be careful about choosing what prologue to emit: When function takes
10024 many instructions to execute we may use slow version as well as in
10025 case function is known to be outside hot spot (this is known with
10026 feedback only). Weight the size of function by number of registers
10027 to save as it is cheap to use one or two push instructions but very
10028 slow to use many of them. */
10030 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
10031 if (node->frequency < NODE_FREQUENCY_NORMAL
10032 || (flag_branch_probabilities
10033 && node->frequency < NODE_FREQUENCY_HOT))
10034 cfun->machine->use_fast_prologue_epilogue = false;
10036 cfun->machine->use_fast_prologue_epilogue
10037 = !expensive_function_p (count);
10040 frame->save_regs_using_mov
10041 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10042 /* If static stack checking is enabled and done with probes,
10043 the registers need to be saved before allocating the frame. */
10044 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10046 /* Skip return address. */
10047 offset = UNITS_PER_WORD;
10049 /* Skip pushed static chain. */
10050 if (ix86_static_chain_on_stack)
10051 offset += UNITS_PER_WORD;
10053 /* Skip saved base pointer. */
10054 if (frame_pointer_needed)
10055 offset += UNITS_PER_WORD;
10056 frame->hfp_save_offset = offset;
10058 /* The traditional frame pointer location is at the top of the frame. */
10059 frame->hard_frame_pointer_offset = offset;
10061 /* Register save area */
10062 offset += frame->nregs * UNITS_PER_WORD;
10063 frame->reg_save_offset = offset;
10065 /* On SEH target, registers are pushed just before the frame pointer
10068 frame->hard_frame_pointer_offset = offset;
10070 /* Align and set SSE register save area. */
10071 if (frame->nsseregs)
10073 /* The only ABI that has saved SSE registers (Win64) also has a
10074 16-byte aligned default stack, and thus we don't need to be
10075 within the re-aligned local stack frame to save them. */
10076 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
10077 offset = (offset + 16 - 1) & -16;
10078 offset += frame->nsseregs * 16;
10080 frame->sse_reg_save_offset = offset;
10082 /* The re-aligned stack starts here. Values before this point are not
10083 directly comparable with values below this point. In order to make
10084 sure that no value happens to be the same before and after, force
10085 the alignment computation below to add a non-zero value. */
10086 if (stack_realign_fp)
10087 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
10090 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10091 offset += frame->va_arg_size;
10093 /* Align start of frame for local function. */
10094 if (stack_realign_fp
10095 || offset != frame->sse_reg_save_offset
10098 || cfun->calls_alloca
10099 || ix86_current_function_calls_tls_descriptor)
10100 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10102 /* Frame pointer points here. */
10103 frame->frame_pointer_offset = offset;
10107 /* Add outgoing arguments area. Can be skipped if we eliminated
10108 all the function calls as dead code.
10109 Skipping is however impossible when function calls alloca. Alloca
10110 expander assumes that last crtl->outgoing_args_size
10111 of stack frame are unused. */
10112 if (ACCUMULATE_OUTGOING_ARGS
10113 && (!crtl->is_leaf || cfun->calls_alloca
10114 || ix86_current_function_calls_tls_descriptor))
10116 offset += crtl->outgoing_args_size;
10117 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10120 frame->outgoing_arguments_size = 0;
10122 /* Align stack boundary. Only needed if we're calling another function
10123 or using alloca. */
10124 if (!crtl->is_leaf || cfun->calls_alloca
10125 || ix86_current_function_calls_tls_descriptor)
10126 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10128 /* We've reached end of stack frame. */
10129 frame->stack_pointer_offset = offset;
10131 /* Size prologue needs to allocate. */
10132 to_allocate = offset - frame->sse_reg_save_offset;
10134 if ((!to_allocate && frame->nregs <= 1)
10135 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10136 frame->save_regs_using_mov = false;
10138 if (ix86_using_red_zone ()
10139 && crtl->sp_is_unchanging
10141 && !ix86_current_function_calls_tls_descriptor)
10143 frame->red_zone_size = to_allocate;
10144 if (frame->save_regs_using_mov)
10145 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10146 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10147 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10150 frame->red_zone_size = 0;
10151 frame->stack_pointer_offset -= frame->red_zone_size;
10153 /* The SEH frame pointer location is near the bottom of the frame.
10154 This is enforced by the fact that the difference between the
10155 stack pointer and the frame pointer is limited to 240 bytes in
10156 the unwind data structure. */
10159 HOST_WIDE_INT diff;
10161 /* If we can leave the frame pointer where it is, do so. Also, returns
10162 the establisher frame for __builtin_frame_address (0). */
10163 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10164 if (diff <= SEH_MAX_FRAME_SIZE
10165 && (diff > 240 || (diff & 15) != 0)
10166 && !crtl->accesses_prior_frames)
10168 /* Ideally we'd determine what portion of the local stack frame
10169 (within the constraint of the lowest 240) is most heavily used.
10170 But without that complication, simply bias the frame pointer
10171 by 128 bytes so as to maximize the amount of the local stack
10172 frame that is addressable with 8-bit offsets. */
10173 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10178 /* This is semi-inlined memory_address_length, but simplified
10179 since we know that we're always dealing with reg+offset, and
10180 to avoid having to create and discard all that rtl. */
10183 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10189 /* EBP and R13 cannot be encoded without an offset. */
10190 len = (regno == BP_REG || regno == R13_REG);
10192 else if (IN_RANGE (offset, -128, 127))
10195 /* ESP and R12 must be encoded with a SIB byte. */
10196 if (regno == SP_REG || regno == R12_REG)
10202 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10203 The valid base registers are taken from CFUN->MACHINE->FS. */
10206 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10208 const struct machine_function *m = cfun->machine;
10209 rtx base_reg = NULL;
10210 HOST_WIDE_INT base_offset = 0;
10212 if (m->use_fast_prologue_epilogue)
10214 /* Choose the base register most likely to allow the most scheduling
10215 opportunities. Generally FP is valid throughout the function,
10216 while DRAP must be reloaded within the epilogue. But choose either
10217 over the SP due to increased encoding size. */
10219 if (m->fs.fp_valid)
10221 base_reg = hard_frame_pointer_rtx;
10222 base_offset = m->fs.fp_offset - cfa_offset;
10224 else if (m->fs.drap_valid)
10226 base_reg = crtl->drap_reg;
10227 base_offset = 0 - cfa_offset;
10229 else if (m->fs.sp_valid)
10231 base_reg = stack_pointer_rtx;
10232 base_offset = m->fs.sp_offset - cfa_offset;
10237 HOST_WIDE_INT toffset;
10238 int len = 16, tlen;
10240 /* Choose the base register with the smallest address encoding.
10241 With a tie, choose FP > DRAP > SP. */
10242 if (m->fs.sp_valid)
10244 base_reg = stack_pointer_rtx;
10245 base_offset = m->fs.sp_offset - cfa_offset;
10246 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10248 if (m->fs.drap_valid)
10250 toffset = 0 - cfa_offset;
10251 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10254 base_reg = crtl->drap_reg;
10255 base_offset = toffset;
10259 if (m->fs.fp_valid)
10261 toffset = m->fs.fp_offset - cfa_offset;
10262 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10265 base_reg = hard_frame_pointer_rtx;
10266 base_offset = toffset;
10271 gcc_assert (base_reg != NULL);
10273 return plus_constant (Pmode, base_reg, base_offset);
10276 /* Emit code to save registers in the prologue. */
10279 ix86_emit_save_regs (void)
10281 unsigned int regno;
10284 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10285 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10287 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10288 RTX_FRAME_RELATED_P (insn) = 1;
10292 /* Emit a single register save at CFA - CFA_OFFSET. */
10295 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10296 HOST_WIDE_INT cfa_offset)
10298 struct machine_function *m = cfun->machine;
10299 rtx reg = gen_rtx_REG (mode, regno);
10300 rtx mem, addr, base, insn;
10302 addr = choose_baseaddr (cfa_offset);
10303 mem = gen_frame_mem (mode, addr);
10305 /* For SSE saves, we need to indicate the 128-bit alignment. */
10306 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10308 insn = emit_move_insn (mem, reg);
10309 RTX_FRAME_RELATED_P (insn) = 1;
10312 if (GET_CODE (base) == PLUS)
10313 base = XEXP (base, 0);
10314 gcc_checking_assert (REG_P (base));
10316 /* When saving registers into a re-aligned local stack frame, avoid
10317 any tricky guessing by dwarf2out. */
10318 if (m->fs.realigned)
10320 gcc_checking_assert (stack_realign_drap);
10322 if (regno == REGNO (crtl->drap_reg))
10324 /* A bit of a hack. We force the DRAP register to be saved in
10325 the re-aligned stack frame, which provides us with a copy
10326 of the CFA that will last past the prologue. Install it. */
10327 gcc_checking_assert (cfun->machine->fs.fp_valid);
10328 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10329 cfun->machine->fs.fp_offset - cfa_offset);
10330 mem = gen_rtx_MEM (mode, addr);
10331 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10335 /* The frame pointer is a stable reference within the
10336 aligned frame. Use it. */
10337 gcc_checking_assert (cfun->machine->fs.fp_valid);
10338 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10339 cfun->machine->fs.fp_offset - cfa_offset);
10340 mem = gen_rtx_MEM (mode, addr);
10341 add_reg_note (insn, REG_CFA_EXPRESSION,
10342 gen_rtx_SET (VOIDmode, mem, reg));
10346 /* The memory may not be relative to the current CFA register,
10347 which means that we may need to generate a new pattern for
10348 use by the unwind info. */
10349 else if (base != m->fs.cfa_reg)
10351 addr = plus_constant (Pmode, m->fs.cfa_reg,
10352 m->fs.cfa_offset - cfa_offset);
10353 mem = gen_rtx_MEM (mode, addr);
10354 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
10358 /* Emit code to save registers using MOV insns.
10359 First register is stored at CFA - CFA_OFFSET. */
10361 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10363 unsigned int regno;
10365 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10366 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10368 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10369 cfa_offset -= UNITS_PER_WORD;
10373 /* Emit code to save SSE registers using MOV insns.
10374 First register is stored at CFA - CFA_OFFSET. */
10376 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10378 unsigned int regno;
10380 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10381 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10383 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10388 static GTY(()) rtx queued_cfa_restores;
10390 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10391 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10392 Don't add the note if the previously saved value will be left untouched
10393 within stack red-zone till return, as unwinders can find the same value
10394 in the register and on the stack. */
10397 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
10399 if (!crtl->shrink_wrapped
10400 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10405 add_reg_note (insn, REG_CFA_RESTORE, reg);
10406 RTX_FRAME_RELATED_P (insn) = 1;
10409 queued_cfa_restores
10410 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10413 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10416 ix86_add_queued_cfa_restore_notes (rtx insn)
10419 if (!queued_cfa_restores)
10421 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10423 XEXP (last, 1) = REG_NOTES (insn);
10424 REG_NOTES (insn) = queued_cfa_restores;
10425 queued_cfa_restores = NULL_RTX;
10426 RTX_FRAME_RELATED_P (insn) = 1;
10429 /* Expand prologue or epilogue stack adjustment.
10430 The pattern exist to put a dependency on all ebp-based memory accesses.
10431 STYLE should be negative if instructions should be marked as frame related,
10432 zero if %r11 register is live and cannot be freely used and positive
10436 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10437 int style, bool set_cfa)
10439 struct machine_function *m = cfun->machine;
10441 bool add_frame_related_expr = false;
10443 if (Pmode == SImode)
10444 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10445 else if (x86_64_immediate_operand (offset, DImode))
10446 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10450 /* r11 is used by indirect sibcall return as well, set before the
10451 epilogue and used after the epilogue. */
10453 tmp = gen_rtx_REG (DImode, R11_REG);
10456 gcc_assert (src != hard_frame_pointer_rtx
10457 && dest != hard_frame_pointer_rtx);
10458 tmp = hard_frame_pointer_rtx;
10460 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10462 add_frame_related_expr = true;
10464 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10467 insn = emit_insn (insn);
10469 ix86_add_queued_cfa_restore_notes (insn);
10475 gcc_assert (m->fs.cfa_reg == src);
10476 m->fs.cfa_offset += INTVAL (offset);
10477 m->fs.cfa_reg = dest;
10479 r = gen_rtx_PLUS (Pmode, src, offset);
10480 r = gen_rtx_SET (VOIDmode, dest, r);
10481 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10482 RTX_FRAME_RELATED_P (insn) = 1;
10484 else if (style < 0)
10486 RTX_FRAME_RELATED_P (insn) = 1;
10487 if (add_frame_related_expr)
10489 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10490 r = gen_rtx_SET (VOIDmode, dest, r);
10491 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10495 if (dest == stack_pointer_rtx)
10497 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10498 bool valid = m->fs.sp_valid;
10500 if (src == hard_frame_pointer_rtx)
10502 valid = m->fs.fp_valid;
10503 ooffset = m->fs.fp_offset;
10505 else if (src == crtl->drap_reg)
10507 valid = m->fs.drap_valid;
10512 /* Else there are two possibilities: SP itself, which we set
10513 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10514 taken care of this by hand along the eh_return path. */
10515 gcc_checking_assert (src == stack_pointer_rtx
10516 || offset == const0_rtx);
10519 m->fs.sp_offset = ooffset - INTVAL (offset);
10520 m->fs.sp_valid = valid;
10524 /* Find an available register to be used as dynamic realign argument
10525 pointer regsiter. Such a register will be written in prologue and
10526 used in begin of body, so it must not be
10527 1. parameter passing register.
10529 We reuse static-chain register if it is available. Otherwise, we
10530 use DI for i386 and R13 for x86-64. We chose R13 since it has
10533 Return: the regno of chosen register. */
10535 static unsigned int
10536 find_drap_reg (void)
10538 tree decl = cfun->decl;
10542 /* Use R13 for nested function or function need static chain.
10543 Since function with tail call may use any caller-saved
10544 registers in epilogue, DRAP must not use caller-saved
10545 register in such case. */
10546 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10553 /* Use DI for nested function or function need static chain.
10554 Since function with tail call may use any caller-saved
10555 registers in epilogue, DRAP must not use caller-saved
10556 register in such case. */
10557 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10560 /* Reuse static chain register if it isn't used for parameter
10562 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10564 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10565 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10572 /* Return minimum incoming stack alignment. */
10574 static unsigned int
10575 ix86_minimum_incoming_stack_boundary (bool sibcall)
10577 unsigned int incoming_stack_boundary;
10579 /* Prefer the one specified at command line. */
10580 if (ix86_user_incoming_stack_boundary)
10581 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10582 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10583 if -mstackrealign is used, it isn't used for sibcall check and
10584 estimated stack alignment is 128bit. */
10587 && ix86_force_align_arg_pointer
10588 && crtl->stack_alignment_estimated == 128)
10589 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10591 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10593 /* Incoming stack alignment can be changed on individual functions
10594 via force_align_arg_pointer attribute. We use the smallest
10595 incoming stack boundary. */
10596 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10597 && lookup_attribute (ix86_force_align_arg_pointer_string,
10598 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10599 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10601 /* The incoming stack frame has to be aligned at least at
10602 parm_stack_boundary. */
10603 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10604 incoming_stack_boundary = crtl->parm_stack_boundary;
10606 /* Stack at entrance of main is aligned by runtime. We use the
10607 smallest incoming stack boundary. */
10608 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10609 && DECL_NAME (current_function_decl)
10610 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10611 && DECL_FILE_SCOPE_P (current_function_decl))
10612 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10614 return incoming_stack_boundary;
10617 /* Update incoming stack boundary and estimated stack alignment. */
10620 ix86_update_stack_boundary (void)
10622 ix86_incoming_stack_boundary
10623 = ix86_minimum_incoming_stack_boundary (false);
10625 /* x86_64 vararg needs 16byte stack alignment for register save
10629 && crtl->stack_alignment_estimated < 128)
10630 crtl->stack_alignment_estimated = 128;
10633 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10634 needed or an rtx for DRAP otherwise. */
10637 ix86_get_drap_rtx (void)
10639 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10640 crtl->need_drap = true;
10642 if (stack_realign_drap)
10644 /* Assign DRAP to vDRAP and returns vDRAP */
10645 unsigned int regno = find_drap_reg ();
10648 rtx_insn *seq, *insn;
10650 arg_ptr = gen_rtx_REG (Pmode, regno);
10651 crtl->drap_reg = arg_ptr;
10654 drap_vreg = copy_to_reg (arg_ptr);
10655 seq = get_insns ();
10658 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10661 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10662 RTX_FRAME_RELATED_P (insn) = 1;
10670 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10673 ix86_internal_arg_pointer (void)
10675 return virtual_incoming_args_rtx;
10678 struct scratch_reg {
10683 /* Return a short-lived scratch register for use on function entry.
10684 In 32-bit mode, it is valid only after the registers are saved
10685 in the prologue. This register must be released by means of
10686 release_scratch_register_on_entry once it is dead. */
10689 get_scratch_register_on_entry (struct scratch_reg *sr)
10697 /* We always use R11 in 64-bit mode. */
10702 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10704 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10706 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10707 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10708 int regparm = ix86_function_regparm (fntype, decl);
10710 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10712 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10713 for the static chain register. */
10714 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10715 && drap_regno != AX_REG)
10717 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10718 for the static chain register. */
10719 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10721 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10723 /* ecx is the static chain register. */
10724 else if (regparm < 3 && !fastcall_p && !thiscall_p
10726 && drap_regno != CX_REG)
10728 else if (ix86_save_reg (BX_REG, true))
10730 /* esi is the static chain register. */
10731 else if (!(regparm == 3 && static_chain_p)
10732 && ix86_save_reg (SI_REG, true))
10734 else if (ix86_save_reg (DI_REG, true))
10738 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10743 sr->reg = gen_rtx_REG (Pmode, regno);
10746 rtx insn = emit_insn (gen_push (sr->reg));
10747 RTX_FRAME_RELATED_P (insn) = 1;
10751 /* Release a scratch register obtained from the preceding function. */
10754 release_scratch_register_on_entry (struct scratch_reg *sr)
10758 struct machine_function *m = cfun->machine;
10759 rtx x, insn = emit_insn (gen_pop (sr->reg));
10761 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10762 RTX_FRAME_RELATED_P (insn) = 1;
10763 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10764 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10765 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10766 m->fs.sp_offset -= UNITS_PER_WORD;
10770 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10772 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10775 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10777 /* We skip the probe for the first interval + a small dope of 4 words and
10778 probe that many bytes past the specified size to maintain a protection
10779 area at the botton of the stack. */
10780 const int dope = 4 * UNITS_PER_WORD;
10781 rtx size_rtx = GEN_INT (size), last;
10783 /* See if we have a constant small number of probes to generate. If so,
10784 that's the easy case. The run-time loop is made up of 11 insns in the
10785 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10786 for n # of intervals. */
10787 if (size <= 5 * PROBE_INTERVAL)
10789 HOST_WIDE_INT i, adjust;
10790 bool first_probe = true;
10792 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10793 values of N from 1 until it exceeds SIZE. If only one probe is
10794 needed, this will not generate any code. Then adjust and probe
10795 to PROBE_INTERVAL + SIZE. */
10796 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10800 adjust = 2 * PROBE_INTERVAL + dope;
10801 first_probe = false;
10804 adjust = PROBE_INTERVAL;
10806 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10807 plus_constant (Pmode, stack_pointer_rtx,
10809 emit_stack_probe (stack_pointer_rtx);
10813 adjust = size + PROBE_INTERVAL + dope;
10815 adjust = size + PROBE_INTERVAL - i;
10817 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10818 plus_constant (Pmode, stack_pointer_rtx,
10820 emit_stack_probe (stack_pointer_rtx);
10822 /* Adjust back to account for the additional first interval. */
10823 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10824 plus_constant (Pmode, stack_pointer_rtx,
10825 PROBE_INTERVAL + dope)));
10828 /* Otherwise, do the same as above, but in a loop. Note that we must be
10829 extra careful with variables wrapping around because we might be at
10830 the very top (or the very bottom) of the address space and we have
10831 to be able to handle this case properly; in particular, we use an
10832 equality test for the loop condition. */
10835 HOST_WIDE_INT rounded_size;
10836 struct scratch_reg sr;
10838 get_scratch_register_on_entry (&sr);
10841 /* Step 1: round SIZE to the previous multiple of the interval. */
10843 rounded_size = size & -PROBE_INTERVAL;
10846 /* Step 2: compute initial and final value of the loop counter. */
10848 /* SP = SP_0 + PROBE_INTERVAL. */
10849 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10850 plus_constant (Pmode, stack_pointer_rtx,
10851 - (PROBE_INTERVAL + dope))));
10853 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10854 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10855 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10856 gen_rtx_PLUS (Pmode, sr.reg,
10857 stack_pointer_rtx)));
10860 /* Step 3: the loop
10862 while (SP != LAST_ADDR)
10864 SP = SP + PROBE_INTERVAL
10868 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10869 values of N from 1 until it is equal to ROUNDED_SIZE. */
10871 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10874 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10875 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10877 if (size != rounded_size)
10879 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10880 plus_constant (Pmode, stack_pointer_rtx,
10881 rounded_size - size)));
10882 emit_stack_probe (stack_pointer_rtx);
10885 /* Adjust back to account for the additional first interval. */
10886 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10887 plus_constant (Pmode, stack_pointer_rtx,
10888 PROBE_INTERVAL + dope)));
10890 release_scratch_register_on_entry (&sr);
10893 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10895 /* Even if the stack pointer isn't the CFA register, we need to correctly
10896 describe the adjustments made to it, in particular differentiate the
10897 frame-related ones from the frame-unrelated ones. */
10900 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10901 XVECEXP (expr, 0, 0)
10902 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10903 plus_constant (Pmode, stack_pointer_rtx, -size));
10904 XVECEXP (expr, 0, 1)
10905 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10906 plus_constant (Pmode, stack_pointer_rtx,
10907 PROBE_INTERVAL + dope + size));
10908 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10909 RTX_FRAME_RELATED_P (last) = 1;
10911 cfun->machine->fs.sp_offset += size;
10914 /* Make sure nothing is scheduled before we are done. */
10915 emit_insn (gen_blockage ());
10918 /* Adjust the stack pointer up to REG while probing it. */
10921 output_adjust_stack_and_probe (rtx reg)
10923 static int labelno = 0;
10924 char loop_lab[32], end_lab[32];
10927 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10928 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10930 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10932 /* Jump to END_LAB if SP == LAST_ADDR. */
10933 xops[0] = stack_pointer_rtx;
10935 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10936 fputs ("\tje\t", asm_out_file);
10937 assemble_name_raw (asm_out_file, end_lab);
10938 fputc ('\n', asm_out_file);
10940 /* SP = SP + PROBE_INTERVAL. */
10941 xops[1] = GEN_INT (PROBE_INTERVAL);
10942 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10945 xops[1] = const0_rtx;
10946 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10948 fprintf (asm_out_file, "\tjmp\t");
10949 assemble_name_raw (asm_out_file, loop_lab);
10950 fputc ('\n', asm_out_file);
10952 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10957 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10958 inclusive. These are offsets from the current stack pointer. */
10961 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10963 /* See if we have a constant small number of probes to generate. If so,
10964 that's the easy case. The run-time loop is made up of 7 insns in the
10965 generic case while the compile-time loop is made up of n insns for n #
10967 if (size <= 7 * PROBE_INTERVAL)
10971 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10972 it exceeds SIZE. If only one probe is needed, this will not
10973 generate any code. Then probe at FIRST + SIZE. */
10974 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10975 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10978 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10982 /* Otherwise, do the same as above, but in a loop. Note that we must be
10983 extra careful with variables wrapping around because we might be at
10984 the very top (or the very bottom) of the address space and we have
10985 to be able to handle this case properly; in particular, we use an
10986 equality test for the loop condition. */
10989 HOST_WIDE_INT rounded_size, last;
10990 struct scratch_reg sr;
10992 get_scratch_register_on_entry (&sr);
10995 /* Step 1: round SIZE to the previous multiple of the interval. */
10997 rounded_size = size & -PROBE_INTERVAL;
11000 /* Step 2: compute initial and final value of the loop counter. */
11002 /* TEST_OFFSET = FIRST. */
11003 emit_move_insn (sr.reg, GEN_INT (-first));
11005 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
11006 last = first + rounded_size;
11009 /* Step 3: the loop
11011 while (TEST_ADDR != LAST_ADDR)
11013 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
11017 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
11018 until it is equal to ROUNDED_SIZE. */
11020 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
11023 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
11024 that SIZE is equal to ROUNDED_SIZE. */
11026 if (size != rounded_size)
11027 emit_stack_probe (plus_constant (Pmode,
11028 gen_rtx_PLUS (Pmode,
11031 rounded_size - size));
11033 release_scratch_register_on_entry (&sr);
11036 /* Make sure nothing is scheduled before we are done. */
11037 emit_insn (gen_blockage ());
11040 /* Probe a range of stack addresses from REG to END, inclusive. These are
11041 offsets from the current stack pointer. */
11044 output_probe_stack_range (rtx reg, rtx end)
11046 static int labelno = 0;
11047 char loop_lab[32], end_lab[32];
11050 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11051 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11053 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11055 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
11058 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11059 fputs ("\tje\t", asm_out_file);
11060 assemble_name_raw (asm_out_file, end_lab);
11061 fputc ('\n', asm_out_file);
11063 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
11064 xops[1] = GEN_INT (PROBE_INTERVAL);
11065 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11067 /* Probe at TEST_ADDR. */
11068 xops[0] = stack_pointer_rtx;
11070 xops[2] = const0_rtx;
11071 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11073 fprintf (asm_out_file, "\tjmp\t");
11074 assemble_name_raw (asm_out_file, loop_lab);
11075 fputc ('\n', asm_out_file);
11077 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11082 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11083 to be generated in correct form. */
11085 ix86_finalize_stack_realign_flags (void)
11087 /* Check if stack realign is really needed after reload, and
11088 stores result in cfun */
11089 unsigned int incoming_stack_boundary
11090 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11091 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11092 unsigned int stack_realign = (incoming_stack_boundary
11094 ? crtl->max_used_stack_slot_alignment
11095 : crtl->stack_alignment_needed));
11097 if (crtl->stack_realign_finalized)
11099 /* After stack_realign_needed is finalized, we can't no longer
11101 gcc_assert (crtl->stack_realign_needed == stack_realign);
11105 /* If the only reason for frame_pointer_needed is that we conservatively
11106 assumed stack realignment might be needed, but in the end nothing that
11107 needed the stack alignment had been spilled, clear frame_pointer_needed
11108 and say we don't need stack realignment. */
11110 && frame_pointer_needed
11112 && flag_omit_frame_pointer
11113 && crtl->sp_is_unchanging
11114 && !ix86_current_function_calls_tls_descriptor
11115 && !crtl->accesses_prior_frames
11116 && !cfun->calls_alloca
11117 && !crtl->calls_eh_return
11118 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
11119 && !ix86_frame_pointer_required ()
11120 && get_frame_size () == 0
11121 && ix86_nsaved_sseregs () == 0
11122 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11124 HARD_REG_SET set_up_by_prologue, prologue_used;
11127 CLEAR_HARD_REG_SET (prologue_used);
11128 CLEAR_HARD_REG_SET (set_up_by_prologue);
11129 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11130 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11131 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11132 HARD_FRAME_POINTER_REGNUM);
11133 FOR_EACH_BB_FN (bb, cfun)
11136 FOR_BB_INSNS (bb, insn)
11137 if (NONDEBUG_INSN_P (insn)
11138 && requires_stack_frame_p (insn, prologue_used,
11139 set_up_by_prologue))
11141 crtl->stack_realign_needed = stack_realign;
11142 crtl->stack_realign_finalized = true;
11147 /* If drap has been set, but it actually isn't live at the start
11148 of the function, there is no reason to set it up. */
11149 if (crtl->drap_reg)
11151 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11152 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11154 crtl->drap_reg = NULL_RTX;
11155 crtl->need_drap = false;
11159 cfun->machine->no_drap_save_restore = true;
11161 frame_pointer_needed = false;
11162 stack_realign = false;
11163 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11164 crtl->stack_alignment_needed = incoming_stack_boundary;
11165 crtl->stack_alignment_estimated = incoming_stack_boundary;
11166 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11167 crtl->preferred_stack_boundary = incoming_stack_boundary;
11168 df_finish_pass (true);
11169 df_scan_alloc (NULL);
11171 df_compute_regs_ever_live (true);
11175 crtl->stack_realign_needed = stack_realign;
11176 crtl->stack_realign_finalized = true;
11179 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11182 ix86_elim_entry_set_got (rtx reg)
11184 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11185 rtx_insn *c_insn = BB_HEAD (bb);
11186 if (!NONDEBUG_INSN_P (c_insn))
11187 c_insn = next_nonnote_nondebug_insn (c_insn);
11188 if (c_insn && NONJUMP_INSN_P (c_insn))
11190 rtx pat = PATTERN (c_insn);
11191 if (GET_CODE (pat) == PARALLEL)
11193 rtx vec = XVECEXP (pat, 0, 0);
11194 if (GET_CODE (vec) == SET
11195 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11196 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11197 delete_insn (c_insn);
11202 /* Expand the prologue into a bunch of separate insns. */
11205 ix86_expand_prologue (void)
11207 struct machine_function *m = cfun->machine;
11209 struct ix86_frame frame;
11210 HOST_WIDE_INT allocate;
11211 bool int_registers_saved;
11212 bool sse_registers_saved;
11214 ix86_finalize_stack_realign_flags ();
11216 /* DRAP should not coexist with stack_realign_fp */
11217 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11219 memset (&m->fs, 0, sizeof (m->fs));
11221 /* Initialize CFA state for before the prologue. */
11222 m->fs.cfa_reg = stack_pointer_rtx;
11223 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11225 /* Track SP offset to the CFA. We continue tracking this after we've
11226 swapped the CFA register away from SP. In the case of re-alignment
11227 this is fudged; we're interested to offsets within the local frame. */
11228 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11229 m->fs.sp_valid = true;
11231 ix86_compute_frame_layout (&frame);
11233 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11235 /* We should have already generated an error for any use of
11236 ms_hook on a nested function. */
11237 gcc_checking_assert (!ix86_static_chain_on_stack);
11239 /* Check if profiling is active and we shall use profiling before
11240 prologue variant. If so sorry. */
11241 if (crtl->profile && flag_fentry != 0)
11242 sorry ("ms_hook_prologue attribute isn%'t compatible "
11243 "with -mfentry for 32-bit");
11245 /* In ix86_asm_output_function_label we emitted:
11246 8b ff movl.s %edi,%edi
11248 8b ec movl.s %esp,%ebp
11250 This matches the hookable function prologue in Win32 API
11251 functions in Microsoft Windows XP Service Pack 2 and newer.
11252 Wine uses this to enable Windows apps to hook the Win32 API
11253 functions provided by Wine.
11255 What that means is that we've already set up the frame pointer. */
11257 if (frame_pointer_needed
11258 && !(crtl->drap_reg && crtl->stack_realign_needed))
11262 /* We've decided to use the frame pointer already set up.
11263 Describe this to the unwinder by pretending that both
11264 push and mov insns happen right here.
11266 Putting the unwind info here at the end of the ms_hook
11267 is done so that we can make absolutely certain we get
11268 the required byte sequence at the start of the function,
11269 rather than relying on an assembler that can produce
11270 the exact encoding required.
11272 However it does mean (in the unpatched case) that we have
11273 a 1 insn window where the asynchronous unwind info is
11274 incorrect. However, if we placed the unwind info at
11275 its correct location we would have incorrect unwind info
11276 in the patched case. Which is probably all moot since
11277 I don't expect Wine generates dwarf2 unwind info for the
11278 system libraries that use this feature. */
11280 insn = emit_insn (gen_blockage ());
11282 push = gen_push (hard_frame_pointer_rtx);
11283 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
11284 stack_pointer_rtx);
11285 RTX_FRAME_RELATED_P (push) = 1;
11286 RTX_FRAME_RELATED_P (mov) = 1;
11288 RTX_FRAME_RELATED_P (insn) = 1;
11289 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11290 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11292 /* Note that gen_push incremented m->fs.cfa_offset, even
11293 though we didn't emit the push insn here. */
11294 m->fs.cfa_reg = hard_frame_pointer_rtx;
11295 m->fs.fp_offset = m->fs.cfa_offset;
11296 m->fs.fp_valid = true;
11300 /* The frame pointer is not needed so pop %ebp again.
11301 This leaves us with a pristine state. */
11302 emit_insn (gen_pop (hard_frame_pointer_rtx));
11306 /* The first insn of a function that accepts its static chain on the
11307 stack is to push the register that would be filled in by a direct
11308 call. This insn will be skipped by the trampoline. */
11309 else if (ix86_static_chain_on_stack)
11311 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11312 emit_insn (gen_blockage ());
11314 /* We don't want to interpret this push insn as a register save,
11315 only as a stack adjustment. The real copy of the register as
11316 a save will be done later, if needed. */
11317 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11318 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
11319 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11320 RTX_FRAME_RELATED_P (insn) = 1;
11323 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11324 of DRAP is needed and stack realignment is really needed after reload */
11325 if (stack_realign_drap)
11327 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11329 /* Only need to push parameter pointer reg if it is caller saved. */
11330 if (!call_used_regs[REGNO (crtl->drap_reg)])
11332 /* Push arg pointer reg */
11333 insn = emit_insn (gen_push (crtl->drap_reg));
11334 RTX_FRAME_RELATED_P (insn) = 1;
11337 /* Grab the argument pointer. */
11338 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11339 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11340 RTX_FRAME_RELATED_P (insn) = 1;
11341 m->fs.cfa_reg = crtl->drap_reg;
11342 m->fs.cfa_offset = 0;
11344 /* Align the stack. */
11345 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11347 GEN_INT (-align_bytes)));
11348 RTX_FRAME_RELATED_P (insn) = 1;
11350 /* Replicate the return address on the stack so that return
11351 address can be reached via (argp - 1) slot. This is needed
11352 to implement macro RETURN_ADDR_RTX and intrinsic function
11353 expand_builtin_return_addr etc. */
11354 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11355 t = gen_frame_mem (word_mode, t);
11356 insn = emit_insn (gen_push (t));
11357 RTX_FRAME_RELATED_P (insn) = 1;
11359 /* For the purposes of frame and register save area addressing,
11360 we've started over with a new frame. */
11361 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11362 m->fs.realigned = true;
11365 int_registers_saved = (frame.nregs == 0);
11366 sse_registers_saved = (frame.nsseregs == 0);
11368 if (frame_pointer_needed && !m->fs.fp_valid)
11370 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11371 slower on all targets. Also sdb doesn't like it. */
11372 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11373 RTX_FRAME_RELATED_P (insn) = 1;
11375 /* Push registers now, before setting the frame pointer
11377 if (!int_registers_saved
11379 && !frame.save_regs_using_mov)
11381 ix86_emit_save_regs ();
11382 int_registers_saved = true;
11383 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11386 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11388 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11389 RTX_FRAME_RELATED_P (insn) = 1;
11391 if (m->fs.cfa_reg == stack_pointer_rtx)
11392 m->fs.cfa_reg = hard_frame_pointer_rtx;
11393 m->fs.fp_offset = m->fs.sp_offset;
11394 m->fs.fp_valid = true;
11398 if (!int_registers_saved)
11400 /* If saving registers via PUSH, do so now. */
11401 if (!frame.save_regs_using_mov)
11403 ix86_emit_save_regs ();
11404 int_registers_saved = true;
11405 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11408 /* When using red zone we may start register saving before allocating
11409 the stack frame saving one cycle of the prologue. However, avoid
11410 doing this if we have to probe the stack; at least on x86_64 the
11411 stack probe can turn into a call that clobbers a red zone location. */
11412 else if (ix86_using_red_zone ()
11413 && (! TARGET_STACK_PROBE
11414 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11416 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11417 int_registers_saved = true;
11421 if (stack_realign_fp)
11423 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11424 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11426 /* The computation of the size of the re-aligned stack frame means
11427 that we must allocate the size of the register save area before
11428 performing the actual alignment. Otherwise we cannot guarantee
11429 that there's enough storage above the realignment point. */
11430 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11431 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11432 GEN_INT (m->fs.sp_offset
11433 - frame.sse_reg_save_offset),
11436 /* Align the stack. */
11437 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11439 GEN_INT (-align_bytes)));
11441 /* For the purposes of register save area addressing, the stack
11442 pointer is no longer valid. As for the value of sp_offset,
11443 see ix86_compute_frame_layout, which we need to match in order
11444 to pass verification of stack_pointer_offset at the end. */
11445 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11446 m->fs.sp_valid = false;
11449 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11451 if (flag_stack_usage_info)
11453 /* We start to count from ARG_POINTER. */
11454 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11456 /* If it was realigned, take into account the fake frame. */
11457 if (stack_realign_drap)
11459 if (ix86_static_chain_on_stack)
11460 stack_size += UNITS_PER_WORD;
11462 if (!call_used_regs[REGNO (crtl->drap_reg)])
11463 stack_size += UNITS_PER_WORD;
11465 /* This over-estimates by 1 minimal-stack-alignment-unit but
11466 mitigates that by counting in the new return address slot. */
11467 current_function_dynamic_stack_size
11468 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11471 current_function_static_stack_size = stack_size;
11474 /* On SEH target with very large frame size, allocate an area to save
11475 SSE registers (as the very large allocation won't be described). */
11477 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11478 && !sse_registers_saved)
11480 HOST_WIDE_INT sse_size =
11481 frame.sse_reg_save_offset - frame.reg_save_offset;
11483 gcc_assert (int_registers_saved);
11485 /* No need to do stack checking as the area will be immediately
11487 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11488 GEN_INT (-sse_size), -1,
11489 m->fs.cfa_reg == stack_pointer_rtx);
11490 allocate -= sse_size;
11491 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11492 sse_registers_saved = true;
11495 /* The stack has already been decremented by the instruction calling us
11496 so probe if the size is non-negative to preserve the protection area. */
11497 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11499 /* We expect the registers to be saved when probes are used. */
11500 gcc_assert (int_registers_saved);
11502 if (STACK_CHECK_MOVING_SP)
11504 if (!(crtl->is_leaf && !cfun->calls_alloca
11505 && allocate <= PROBE_INTERVAL))
11507 ix86_adjust_stack_and_probe (allocate);
11513 HOST_WIDE_INT size = allocate;
11515 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11516 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11518 if (TARGET_STACK_PROBE)
11520 if (crtl->is_leaf && !cfun->calls_alloca)
11522 if (size > PROBE_INTERVAL)
11523 ix86_emit_probe_stack_range (0, size);
11526 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11530 if (crtl->is_leaf && !cfun->calls_alloca)
11532 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11533 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11534 size - STACK_CHECK_PROTECT);
11537 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11544 else if (!ix86_target_stack_probe ()
11545 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11547 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11548 GEN_INT (-allocate), -1,
11549 m->fs.cfa_reg == stack_pointer_rtx);
11553 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11555 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11556 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11557 bool eax_live = ix86_eax_live_at_start_p ();
11558 bool r10_live = false;
11561 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11565 insn = emit_insn (gen_push (eax));
11566 allocate -= UNITS_PER_WORD;
11567 /* Note that SEH directives need to continue tracking the stack
11568 pointer even after the frame pointer has been set up. */
11569 if (sp_is_cfa_reg || TARGET_SEH)
11572 m->fs.cfa_offset += UNITS_PER_WORD;
11573 RTX_FRAME_RELATED_P (insn) = 1;
11574 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11575 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11576 plus_constant (Pmode, stack_pointer_rtx,
11577 -UNITS_PER_WORD)));
11583 r10 = gen_rtx_REG (Pmode, R10_REG);
11584 insn = emit_insn (gen_push (r10));
11585 allocate -= UNITS_PER_WORD;
11586 if (sp_is_cfa_reg || TARGET_SEH)
11589 m->fs.cfa_offset += UNITS_PER_WORD;
11590 RTX_FRAME_RELATED_P (insn) = 1;
11591 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11592 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11593 plus_constant (Pmode, stack_pointer_rtx,
11594 -UNITS_PER_WORD)));
11598 emit_move_insn (eax, GEN_INT (allocate));
11599 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11601 /* Use the fact that AX still contains ALLOCATE. */
11602 adjust_stack_insn = (Pmode == DImode
11603 ? gen_pro_epilogue_adjust_stack_di_sub
11604 : gen_pro_epilogue_adjust_stack_si_sub);
11606 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11607 stack_pointer_rtx, eax));
11609 if (sp_is_cfa_reg || TARGET_SEH)
11612 m->fs.cfa_offset += allocate;
11613 RTX_FRAME_RELATED_P (insn) = 1;
11614 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11615 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11616 plus_constant (Pmode, stack_pointer_rtx,
11619 m->fs.sp_offset += allocate;
11621 /* Use stack_pointer_rtx for relative addressing so that code
11622 works for realigned stack, too. */
11623 if (r10_live && eax_live)
11625 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11626 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11627 gen_frame_mem (word_mode, t));
11628 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11629 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11630 gen_frame_mem (word_mode, t));
11632 else if (eax_live || r10_live)
11634 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11635 emit_move_insn (gen_rtx_REG (word_mode,
11636 (eax_live ? AX_REG : R10_REG)),
11637 gen_frame_mem (word_mode, t));
11640 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11642 /* If we havn't already set up the frame pointer, do so now. */
11643 if (frame_pointer_needed && !m->fs.fp_valid)
11645 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11646 GEN_INT (frame.stack_pointer_offset
11647 - frame.hard_frame_pointer_offset));
11648 insn = emit_insn (insn);
11649 RTX_FRAME_RELATED_P (insn) = 1;
11650 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11652 if (m->fs.cfa_reg == stack_pointer_rtx)
11653 m->fs.cfa_reg = hard_frame_pointer_rtx;
11654 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11655 m->fs.fp_valid = true;
11658 if (!int_registers_saved)
11659 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11660 if (!sse_registers_saved)
11661 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11663 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11665 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11667 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11668 insn = emit_insn (gen_set_got (pic));
11669 RTX_FRAME_RELATED_P (insn) = 1;
11670 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11671 emit_insn (gen_prologue_use (pic));
11672 /* Deleting already emmitted SET_GOT if exist and allocated to
11673 REAL_PIC_OFFSET_TABLE_REGNUM. */
11674 ix86_elim_entry_set_got (pic);
11677 if (crtl->drap_reg && !crtl->stack_realign_needed)
11679 /* vDRAP is setup but after reload it turns out stack realign
11680 isn't necessary, here we will emit prologue to setup DRAP
11681 without stack realign adjustment */
11682 t = choose_baseaddr (0);
11683 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11686 /* Prevent instructions from being scheduled into register save push
11687 sequence when access to the redzone area is done through frame pointer.
11688 The offset between the frame pointer and the stack pointer is calculated
11689 relative to the value of the stack pointer at the end of the function
11690 prologue, and moving instructions that access redzone area via frame
11691 pointer inside push sequence violates this assumption. */
11692 if (frame_pointer_needed && frame.red_zone_size)
11693 emit_insn (gen_memory_blockage ());
11695 /* Emit cld instruction if stringops are used in the function. */
11696 if (TARGET_CLD && ix86_current_function_needs_cld)
11697 emit_insn (gen_cld ());
11699 /* SEH requires that the prologue end within 256 bytes of the start of
11700 the function. Prevent instruction schedules that would extend that.
11701 Further, prevent alloca modifications to the stack pointer from being
11702 combined with prologue modifications. */
11704 emit_insn (gen_prologue_use (stack_pointer_rtx));
11707 /* Emit code to restore REG using a POP insn. */
11710 ix86_emit_restore_reg_using_pop (rtx reg)
11712 struct machine_function *m = cfun->machine;
11713 rtx insn = emit_insn (gen_pop (reg));
11715 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11716 m->fs.sp_offset -= UNITS_PER_WORD;
11718 if (m->fs.cfa_reg == crtl->drap_reg
11719 && REGNO (reg) == REGNO (crtl->drap_reg))
11721 /* Previously we'd represented the CFA as an expression
11722 like *(%ebp - 8). We've just popped that value from
11723 the stack, which means we need to reset the CFA to
11724 the drap register. This will remain until we restore
11725 the stack pointer. */
11726 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11727 RTX_FRAME_RELATED_P (insn) = 1;
11729 /* This means that the DRAP register is valid for addressing too. */
11730 m->fs.drap_valid = true;
11734 if (m->fs.cfa_reg == stack_pointer_rtx)
11736 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11737 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11738 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11739 RTX_FRAME_RELATED_P (insn) = 1;
11741 m->fs.cfa_offset -= UNITS_PER_WORD;
11744 /* When the frame pointer is the CFA, and we pop it, we are
11745 swapping back to the stack pointer as the CFA. This happens
11746 for stack frames that don't allocate other data, so we assume
11747 the stack pointer is now pointing at the return address, i.e.
11748 the function entry state, which makes the offset be 1 word. */
11749 if (reg == hard_frame_pointer_rtx)
11751 m->fs.fp_valid = false;
11752 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11754 m->fs.cfa_reg = stack_pointer_rtx;
11755 m->fs.cfa_offset -= UNITS_PER_WORD;
11757 add_reg_note (insn, REG_CFA_DEF_CFA,
11758 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11759 GEN_INT (m->fs.cfa_offset)));
11760 RTX_FRAME_RELATED_P (insn) = 1;
11765 /* Emit code to restore saved registers using POP insns. */
11768 ix86_emit_restore_regs_using_pop (void)
11770 unsigned int regno;
11772 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11773 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11774 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11777 /* Emit code and notes for the LEAVE instruction. */
11780 ix86_emit_leave (void)
11782 struct machine_function *m = cfun->machine;
11783 rtx insn = emit_insn (ix86_gen_leave ());
11785 ix86_add_queued_cfa_restore_notes (insn);
11787 gcc_assert (m->fs.fp_valid);
11788 m->fs.sp_valid = true;
11789 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11790 m->fs.fp_valid = false;
11792 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11794 m->fs.cfa_reg = stack_pointer_rtx;
11795 m->fs.cfa_offset = m->fs.sp_offset;
11797 add_reg_note (insn, REG_CFA_DEF_CFA,
11798 plus_constant (Pmode, stack_pointer_rtx,
11800 RTX_FRAME_RELATED_P (insn) = 1;
11802 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11806 /* Emit code to restore saved registers using MOV insns.
11807 First register is restored from CFA - CFA_OFFSET. */
11809 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11810 bool maybe_eh_return)
11812 struct machine_function *m = cfun->machine;
11813 unsigned int regno;
11815 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11816 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11818 rtx reg = gen_rtx_REG (word_mode, regno);
11821 mem = choose_baseaddr (cfa_offset);
11822 mem = gen_frame_mem (word_mode, mem);
11823 insn = emit_move_insn (reg, mem);
11825 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11827 /* Previously we'd represented the CFA as an expression
11828 like *(%ebp - 8). We've just popped that value from
11829 the stack, which means we need to reset the CFA to
11830 the drap register. This will remain until we restore
11831 the stack pointer. */
11832 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11833 RTX_FRAME_RELATED_P (insn) = 1;
11835 /* This means that the DRAP register is valid for addressing. */
11836 m->fs.drap_valid = true;
11839 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11841 cfa_offset -= UNITS_PER_WORD;
11845 /* Emit code to restore saved registers using MOV insns.
11846 First register is restored from CFA - CFA_OFFSET. */
11848 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11849 bool maybe_eh_return)
11851 unsigned int regno;
11853 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11854 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11856 rtx reg = gen_rtx_REG (V4SFmode, regno);
11859 mem = choose_baseaddr (cfa_offset);
11860 mem = gen_rtx_MEM (V4SFmode, mem);
11861 set_mem_align (mem, 128);
11862 emit_move_insn (reg, mem);
11864 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11870 /* Restore function stack, frame, and registers. */
11873 ix86_expand_epilogue (int style)
11875 struct machine_function *m = cfun->machine;
11876 struct machine_frame_state frame_state_save = m->fs;
11877 struct ix86_frame frame;
11878 bool restore_regs_via_mov;
11881 ix86_finalize_stack_realign_flags ();
11882 ix86_compute_frame_layout (&frame);
11884 m->fs.sp_valid = (!frame_pointer_needed
11885 || (crtl->sp_is_unchanging
11886 && !stack_realign_fp));
11887 gcc_assert (!m->fs.sp_valid
11888 || m->fs.sp_offset == frame.stack_pointer_offset);
11890 /* The FP must be valid if the frame pointer is present. */
11891 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11892 gcc_assert (!m->fs.fp_valid
11893 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11895 /* We must have *some* valid pointer to the stack frame. */
11896 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11898 /* The DRAP is never valid at this point. */
11899 gcc_assert (!m->fs.drap_valid);
11901 /* See the comment about red zone and frame
11902 pointer usage in ix86_expand_prologue. */
11903 if (frame_pointer_needed && frame.red_zone_size)
11904 emit_insn (gen_memory_blockage ());
11906 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11907 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11909 /* Determine the CFA offset of the end of the red-zone. */
11910 m->fs.red_zone_offset = 0;
11911 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11913 /* The red-zone begins below the return address. */
11914 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11916 /* When the register save area is in the aligned portion of
11917 the stack, determine the maximum runtime displacement that
11918 matches up with the aligned frame. */
11919 if (stack_realign_drap)
11920 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11924 /* Special care must be taken for the normal return case of a function
11925 using eh_return: the eax and edx registers are marked as saved, but
11926 not restored along this path. Adjust the save location to match. */
11927 if (crtl->calls_eh_return && style != 2)
11928 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11930 /* EH_RETURN requires the use of moves to function properly. */
11931 if (crtl->calls_eh_return)
11932 restore_regs_via_mov = true;
11933 /* SEH requires the use of pops to identify the epilogue. */
11934 else if (TARGET_SEH)
11935 restore_regs_via_mov = false;
11936 /* If we're only restoring one register and sp is not valid then
11937 using a move instruction to restore the register since it's
11938 less work than reloading sp and popping the register. */
11939 else if (!m->fs.sp_valid && frame.nregs <= 1)
11940 restore_regs_via_mov = true;
11941 else if (TARGET_EPILOGUE_USING_MOVE
11942 && cfun->machine->use_fast_prologue_epilogue
11943 && (frame.nregs > 1
11944 || m->fs.sp_offset != frame.reg_save_offset))
11945 restore_regs_via_mov = true;
11946 else if (frame_pointer_needed
11948 && m->fs.sp_offset != frame.reg_save_offset)
11949 restore_regs_via_mov = true;
11950 else if (frame_pointer_needed
11951 && TARGET_USE_LEAVE
11952 && cfun->machine->use_fast_prologue_epilogue
11953 && frame.nregs == 1)
11954 restore_regs_via_mov = true;
11956 restore_regs_via_mov = false;
11958 if (restore_regs_via_mov || frame.nsseregs)
11960 /* Ensure that the entire register save area is addressable via
11961 the stack pointer, if we will restore via sp. */
11963 && m->fs.sp_offset > 0x7fffffff
11964 && !(m->fs.fp_valid || m->fs.drap_valid)
11965 && (frame.nsseregs + frame.nregs) != 0)
11967 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11968 GEN_INT (m->fs.sp_offset
11969 - frame.sse_reg_save_offset),
11971 m->fs.cfa_reg == stack_pointer_rtx);
11975 /* If there are any SSE registers to restore, then we have to do it
11976 via moves, since there's obviously no pop for SSE regs. */
11977 if (frame.nsseregs)
11978 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11981 if (restore_regs_via_mov)
11986 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11988 /* eh_return epilogues need %ecx added to the stack pointer. */
11991 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11993 /* Stack align doesn't work with eh_return. */
11994 gcc_assert (!stack_realign_drap);
11995 /* Neither does regparm nested functions. */
11996 gcc_assert (!ix86_static_chain_on_stack);
11998 if (frame_pointer_needed)
12000 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
12001 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
12002 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
12004 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
12005 insn = emit_move_insn (hard_frame_pointer_rtx, t);
12007 /* Note that we use SA as a temporary CFA, as the return
12008 address is at the proper place relative to it. We
12009 pretend this happens at the FP restore insn because
12010 prior to this insn the FP would be stored at the wrong
12011 offset relative to SA, and after this insn we have no
12012 other reasonable register to use for the CFA. We don't
12013 bother resetting the CFA to the SP for the duration of
12014 the return insn. */
12015 add_reg_note (insn, REG_CFA_DEF_CFA,
12016 plus_constant (Pmode, sa, UNITS_PER_WORD));
12017 ix86_add_queued_cfa_restore_notes (insn);
12018 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
12019 RTX_FRAME_RELATED_P (insn) = 1;
12021 m->fs.cfa_reg = sa;
12022 m->fs.cfa_offset = UNITS_PER_WORD;
12023 m->fs.fp_valid = false;
12025 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
12026 const0_rtx, style, false);
12030 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
12031 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
12032 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
12033 ix86_add_queued_cfa_restore_notes (insn);
12035 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
12036 if (m->fs.cfa_offset != UNITS_PER_WORD)
12038 m->fs.cfa_offset = UNITS_PER_WORD;
12039 add_reg_note (insn, REG_CFA_DEF_CFA,
12040 plus_constant (Pmode, stack_pointer_rtx,
12042 RTX_FRAME_RELATED_P (insn) = 1;
12045 m->fs.sp_offset = UNITS_PER_WORD;
12046 m->fs.sp_valid = true;
12051 /* SEH requires that the function end with (1) a stack adjustment
12052 if necessary, (2) a sequence of pops, and (3) a return or
12053 jump instruction. Prevent insns from the function body from
12054 being scheduled into this sequence. */
12057 /* Prevent a catch region from being adjacent to the standard
12058 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
12059 several other flags that would be interesting to test are
12061 if (flag_non_call_exceptions)
12062 emit_insn (gen_nops (const1_rtx));
12064 emit_insn (gen_blockage ());
12067 /* First step is to deallocate the stack frame so that we can
12068 pop the registers. Also do it on SEH target for very large
12069 frame as the emitted instructions aren't allowed by the ABI in
12071 if (!m->fs.sp_valid
12073 && (m->fs.sp_offset - frame.reg_save_offset
12074 >= SEH_MAX_FRAME_SIZE)))
12076 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12077 GEN_INT (m->fs.fp_offset
12078 - frame.reg_save_offset),
12081 else if (m->fs.sp_offset != frame.reg_save_offset)
12083 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12084 GEN_INT (m->fs.sp_offset
12085 - frame.reg_save_offset),
12087 m->fs.cfa_reg == stack_pointer_rtx);
12090 ix86_emit_restore_regs_using_pop ();
12093 /* If we used a stack pointer and haven't already got rid of it,
12095 if (m->fs.fp_valid)
12097 /* If the stack pointer is valid and pointing at the frame
12098 pointer store address, then we only need a pop. */
12099 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12100 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12101 /* Leave results in shorter dependency chains on CPUs that are
12102 able to grok it fast. */
12103 else if (TARGET_USE_LEAVE
12104 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12105 || !cfun->machine->use_fast_prologue_epilogue)
12106 ix86_emit_leave ();
12109 pro_epilogue_adjust_stack (stack_pointer_rtx,
12110 hard_frame_pointer_rtx,
12111 const0_rtx, style, !using_drap);
12112 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12118 int param_ptr_offset = UNITS_PER_WORD;
12121 gcc_assert (stack_realign_drap);
12123 if (ix86_static_chain_on_stack)
12124 param_ptr_offset += UNITS_PER_WORD;
12125 if (!call_used_regs[REGNO (crtl->drap_reg)])
12126 param_ptr_offset += UNITS_PER_WORD;
12128 insn = emit_insn (gen_rtx_SET
12129 (VOIDmode, stack_pointer_rtx,
12130 gen_rtx_PLUS (Pmode,
12132 GEN_INT (-param_ptr_offset))));
12133 m->fs.cfa_reg = stack_pointer_rtx;
12134 m->fs.cfa_offset = param_ptr_offset;
12135 m->fs.sp_offset = param_ptr_offset;
12136 m->fs.realigned = false;
12138 add_reg_note (insn, REG_CFA_DEF_CFA,
12139 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12140 GEN_INT (param_ptr_offset)));
12141 RTX_FRAME_RELATED_P (insn) = 1;
12143 if (!call_used_regs[REGNO (crtl->drap_reg)])
12144 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12147 /* At this point the stack pointer must be valid, and we must have
12148 restored all of the registers. We may not have deallocated the
12149 entire stack frame. We've delayed this until now because it may
12150 be possible to merge the local stack deallocation with the
12151 deallocation forced by ix86_static_chain_on_stack. */
12152 gcc_assert (m->fs.sp_valid);
12153 gcc_assert (!m->fs.fp_valid);
12154 gcc_assert (!m->fs.realigned);
12155 if (m->fs.sp_offset != UNITS_PER_WORD)
12157 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12158 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12162 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12164 /* Sibcall epilogues don't want a return instruction. */
12167 m->fs = frame_state_save;
12171 if (crtl->args.pops_args && crtl->args.size)
12173 rtx popc = GEN_INT (crtl->args.pops_args);
12175 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12176 address, do explicit add, and jump indirectly to the caller. */
12178 if (crtl->args.pops_args >= 65536)
12180 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12183 /* There is no "pascal" calling convention in any 64bit ABI. */
12184 gcc_assert (!TARGET_64BIT);
12186 insn = emit_insn (gen_pop (ecx));
12187 m->fs.cfa_offset -= UNITS_PER_WORD;
12188 m->fs.sp_offset -= UNITS_PER_WORD;
12190 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12191 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
12192 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12193 add_reg_note (insn, REG_CFA_REGISTER,
12194 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
12195 RTX_FRAME_RELATED_P (insn) = 1;
12197 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12199 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12202 emit_jump_insn (gen_simple_return_pop_internal (popc));
12205 emit_jump_insn (gen_simple_return_internal ());
12207 /* Restore the state back to the state from the prologue,
12208 so that it's correct for the next epilogue. */
12209 m->fs = frame_state_save;
12212 /* Reset from the function's potential modifications. */
12215 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12217 if (pic_offset_table_rtx
12218 && !ix86_use_pseudo_pic_reg ())
12219 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12221 /* Mach-O doesn't support labels at the end of objects, so if
12222 it looks like we might want one, insert a NOP. */
12224 rtx_insn *insn = get_last_insn ();
12225 rtx_insn *deleted_debug_label = NULL;
12228 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12230 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12231 notes only, instead set their CODE_LABEL_NUMBER to -1,
12232 otherwise there would be code generation differences
12233 in between -g and -g0. */
12234 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12235 deleted_debug_label = insn;
12236 insn = PREV_INSN (insn);
12241 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12242 fputs ("\tnop\n", file);
12243 else if (deleted_debug_label)
12244 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12245 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12246 CODE_LABEL_NUMBER (insn) = -1;
12252 /* Return a scratch register to use in the split stack prologue. The
12253 split stack prologue is used for -fsplit-stack. It is the first
12254 instructions in the function, even before the regular prologue.
12255 The scratch register can be any caller-saved register which is not
12256 used for parameters or for the static chain. */
12258 static unsigned int
12259 split_stack_prologue_scratch_regno (void)
12265 bool is_fastcall, is_thiscall;
12268 is_fastcall = (lookup_attribute ("fastcall",
12269 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12271 is_thiscall = (lookup_attribute ("thiscall",
12272 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12274 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12278 if (DECL_STATIC_CHAIN (cfun->decl))
12280 sorry ("-fsplit-stack does not support fastcall with "
12281 "nested function");
12282 return INVALID_REGNUM;
12286 else if (is_thiscall)
12288 if (!DECL_STATIC_CHAIN (cfun->decl))
12292 else if (regparm < 3)
12294 if (!DECL_STATIC_CHAIN (cfun->decl))
12300 sorry ("-fsplit-stack does not support 2 register "
12301 "parameters for a nested function");
12302 return INVALID_REGNUM;
12309 /* FIXME: We could make this work by pushing a register
12310 around the addition and comparison. */
12311 sorry ("-fsplit-stack does not support 3 register parameters");
12312 return INVALID_REGNUM;
12317 /* A SYMBOL_REF for the function which allocates new stackspace for
12320 static GTY(()) rtx split_stack_fn;
12322 /* A SYMBOL_REF for the more stack function when using the large
12325 static GTY(()) rtx split_stack_fn_large;
12327 /* Handle -fsplit-stack. These are the first instructions in the
12328 function, even before the regular prologue. */
12331 ix86_expand_split_stack_prologue (void)
12333 struct ix86_frame frame;
12334 HOST_WIDE_INT allocate;
12335 unsigned HOST_WIDE_INT args_size;
12336 rtx_code_label *label;
12337 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12338 rtx scratch_reg = NULL_RTX;
12339 rtx_code_label *varargs_label = NULL;
12342 gcc_assert (flag_split_stack && reload_completed);
12344 ix86_finalize_stack_realign_flags ();
12345 ix86_compute_frame_layout (&frame);
12346 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12348 /* This is the label we will branch to if we have enough stack
12349 space. We expect the basic block reordering pass to reverse this
12350 branch if optimizing, so that we branch in the unlikely case. */
12351 label = gen_label_rtx ();
12353 /* We need to compare the stack pointer minus the frame size with
12354 the stack boundary in the TCB. The stack boundary always gives
12355 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12356 can compare directly. Otherwise we need to do an addition. */
12358 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12359 UNSPEC_STACK_CHECK);
12360 limit = gen_rtx_CONST (Pmode, limit);
12361 limit = gen_rtx_MEM (Pmode, limit);
12362 if (allocate < SPLIT_STACK_AVAILABLE)
12363 current = stack_pointer_rtx;
12366 unsigned int scratch_regno;
12369 /* We need a scratch register to hold the stack pointer minus
12370 the required frame size. Since this is the very start of the
12371 function, the scratch register can be any caller-saved
12372 register which is not used for parameters. */
12373 offset = GEN_INT (- allocate);
12374 scratch_regno = split_stack_prologue_scratch_regno ();
12375 if (scratch_regno == INVALID_REGNUM)
12377 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12378 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12380 /* We don't use ix86_gen_add3 in this case because it will
12381 want to split to lea, but when not optimizing the insn
12382 will not be split after this point. */
12383 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12384 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12389 emit_move_insn (scratch_reg, offset);
12390 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12391 stack_pointer_rtx));
12393 current = scratch_reg;
12396 ix86_expand_branch (GEU, current, limit, label);
12397 jump_insn = get_last_insn ();
12398 JUMP_LABEL (jump_insn) = label;
12400 /* Mark the jump as very likely to be taken. */
12401 add_int_reg_note (jump_insn, REG_BR_PROB,
12402 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12404 if (split_stack_fn == NULL_RTX)
12406 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12407 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12409 fn = split_stack_fn;
12411 /* Get more stack space. We pass in the desired stack space and the
12412 size of the arguments to copy to the new stack. In 32-bit mode
12413 we push the parameters; __morestack will return on a new stack
12414 anyhow. In 64-bit mode we pass the parameters in r10 and
12416 allocate_rtx = GEN_INT (allocate);
12417 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12418 call_fusage = NULL_RTX;
12423 reg10 = gen_rtx_REG (Pmode, R10_REG);
12424 reg11 = gen_rtx_REG (Pmode, R11_REG);
12426 /* If this function uses a static chain, it will be in %r10.
12427 Preserve it across the call to __morestack. */
12428 if (DECL_STATIC_CHAIN (cfun->decl))
12432 rax = gen_rtx_REG (word_mode, AX_REG);
12433 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12434 use_reg (&call_fusage, rax);
12437 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12440 HOST_WIDE_INT argval;
12442 gcc_assert (Pmode == DImode);
12443 /* When using the large model we need to load the address
12444 into a register, and we've run out of registers. So we
12445 switch to a different calling convention, and we call a
12446 different function: __morestack_large. We pass the
12447 argument size in the upper 32 bits of r10 and pass the
12448 frame size in the lower 32 bits. */
12449 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12450 gcc_assert ((args_size & 0xffffffff) == args_size);
12452 if (split_stack_fn_large == NULL_RTX)
12454 split_stack_fn_large =
12455 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12456 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12458 if (ix86_cmodel == CM_LARGE_PIC)
12460 rtx_code_label *label;
12463 label = gen_label_rtx ();
12464 emit_label (label);
12465 LABEL_PRESERVE_P (label) = 1;
12466 emit_insn (gen_set_rip_rex64 (reg10, label));
12467 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12468 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12469 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12471 x = gen_rtx_CONST (Pmode, x);
12472 emit_move_insn (reg11, x);
12473 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12474 x = gen_const_mem (Pmode, x);
12475 emit_move_insn (reg11, x);
12478 emit_move_insn (reg11, split_stack_fn_large);
12482 argval = ((args_size << 16) << 16) + allocate;
12483 emit_move_insn (reg10, GEN_INT (argval));
12487 emit_move_insn (reg10, allocate_rtx);
12488 emit_move_insn (reg11, GEN_INT (args_size));
12489 use_reg (&call_fusage, reg11);
12492 use_reg (&call_fusage, reg10);
12496 emit_insn (gen_push (GEN_INT (args_size)));
12497 emit_insn (gen_push (allocate_rtx));
12499 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12500 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12502 add_function_usage_to (call_insn, call_fusage);
12504 /* In order to make call/return prediction work right, we now need
12505 to execute a return instruction. See
12506 libgcc/config/i386/morestack.S for the details on how this works.
12508 For flow purposes gcc must not see this as a return
12509 instruction--we need control flow to continue at the subsequent
12510 label. Therefore, we use an unspec. */
12511 gcc_assert (crtl->args.pops_args < 65536);
12512 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12514 /* If we are in 64-bit mode and this function uses a static chain,
12515 we saved %r10 in %rax before calling _morestack. */
12516 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12517 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12518 gen_rtx_REG (word_mode, AX_REG));
12520 /* If this function calls va_start, we need to store a pointer to
12521 the arguments on the old stack, because they may not have been
12522 all copied to the new stack. At this point the old stack can be
12523 found at the frame pointer value used by __morestack, because
12524 __morestack has set that up before calling back to us. Here we
12525 store that pointer in a scratch register, and in
12526 ix86_expand_prologue we store the scratch register in a stack
12528 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12530 unsigned int scratch_regno;
12534 scratch_regno = split_stack_prologue_scratch_regno ();
12535 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12536 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12540 return address within this function
12541 return address of caller of this function
12543 So we add three words to get to the stack arguments.
12547 return address within this function
12548 first argument to __morestack
12549 second argument to __morestack
12550 return address of caller of this function
12552 So we add five words to get to the stack arguments.
12554 words = TARGET_64BIT ? 3 : 5;
12555 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12556 gen_rtx_PLUS (Pmode, frame_reg,
12557 GEN_INT (words * UNITS_PER_WORD))));
12559 varargs_label = gen_label_rtx ();
12560 emit_jump_insn (gen_jump (varargs_label));
12561 JUMP_LABEL (get_last_insn ()) = varargs_label;
12566 emit_label (label);
12567 LABEL_NUSES (label) = 1;
12569 /* If this function calls va_start, we now have to set the scratch
12570 register for the case where we do not call __morestack. In this
12571 case we need to set it based on the stack pointer. */
12572 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12574 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12575 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12576 GEN_INT (UNITS_PER_WORD))));
12578 emit_label (varargs_label);
12579 LABEL_NUSES (varargs_label) = 1;
12583 /* We may have to tell the dataflow pass that the split stack prologue
12584 is initializing a scratch register. */
12587 ix86_live_on_entry (bitmap regs)
12589 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12591 gcc_assert (flag_split_stack);
12592 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12596 /* Extract the parts of an RTL expression that is a valid memory address
12597 for an instruction. Return 0 if the structure of the address is
12598 grossly off. Return -1 if the address contains ASHIFT, so it is not
12599 strictly valid, but still used for computing length of lea instruction. */
12602 ix86_decompose_address (rtx addr, struct ix86_address *out)
12604 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12605 rtx base_reg, index_reg;
12606 HOST_WIDE_INT scale = 1;
12607 rtx scale_rtx = NULL_RTX;
12610 enum ix86_address_seg seg = SEG_DEFAULT;
12612 /* Allow zero-extended SImode addresses,
12613 they will be emitted with addr32 prefix. */
12614 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12616 if (GET_CODE (addr) == ZERO_EXTEND
12617 && GET_MODE (XEXP (addr, 0)) == SImode)
12619 addr = XEXP (addr, 0);
12620 if (CONST_INT_P (addr))
12623 else if (GET_CODE (addr) == AND
12624 && const_32bit_mask (XEXP (addr, 1), DImode))
12626 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12627 if (addr == NULL_RTX)
12630 if (CONST_INT_P (addr))
12635 /* Allow SImode subregs of DImode addresses,
12636 they will be emitted with addr32 prefix. */
12637 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12639 if (GET_CODE (addr) == SUBREG
12640 && GET_MODE (SUBREG_REG (addr)) == DImode)
12642 addr = SUBREG_REG (addr);
12643 if (CONST_INT_P (addr))
12650 else if (GET_CODE (addr) == SUBREG)
12652 if (REG_P (SUBREG_REG (addr)))
12657 else if (GET_CODE (addr) == PLUS)
12659 rtx addends[4], op;
12667 addends[n++] = XEXP (op, 1);
12670 while (GET_CODE (op) == PLUS);
12675 for (i = n; i >= 0; --i)
12678 switch (GET_CODE (op))
12683 index = XEXP (op, 0);
12684 scale_rtx = XEXP (op, 1);
12690 index = XEXP (op, 0);
12691 tmp = XEXP (op, 1);
12692 if (!CONST_INT_P (tmp))
12694 scale = INTVAL (tmp);
12695 if ((unsigned HOST_WIDE_INT) scale > 3)
12697 scale = 1 << scale;
12702 if (GET_CODE (op) != UNSPEC)
12707 if (XINT (op, 1) == UNSPEC_TP
12708 && TARGET_TLS_DIRECT_SEG_REFS
12709 && seg == SEG_DEFAULT)
12710 seg = DEFAULT_TLS_SEG_REG;
12716 if (!REG_P (SUBREG_REG (op)))
12743 else if (GET_CODE (addr) == MULT)
12745 index = XEXP (addr, 0); /* index*scale */
12746 scale_rtx = XEXP (addr, 1);
12748 else if (GET_CODE (addr) == ASHIFT)
12750 /* We're called for lea too, which implements ashift on occasion. */
12751 index = XEXP (addr, 0);
12752 tmp = XEXP (addr, 1);
12753 if (!CONST_INT_P (tmp))
12755 scale = INTVAL (tmp);
12756 if ((unsigned HOST_WIDE_INT) scale > 3)
12758 scale = 1 << scale;
12762 disp = addr; /* displacement */
12768 else if (GET_CODE (index) == SUBREG
12769 && REG_P (SUBREG_REG (index)))
12775 /* Extract the integral value of scale. */
12778 if (!CONST_INT_P (scale_rtx))
12780 scale = INTVAL (scale_rtx);
12783 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12784 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12786 /* Avoid useless 0 displacement. */
12787 if (disp == const0_rtx && (base || index))
12790 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12791 if (base_reg && index_reg && scale == 1
12792 && (index_reg == arg_pointer_rtx
12793 || index_reg == frame_pointer_rtx
12794 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12796 std::swap (base, index);
12797 std::swap (base_reg, index_reg);
12800 /* Special case: %ebp cannot be encoded as a base without a displacement.
12804 && (base_reg == hard_frame_pointer_rtx
12805 || base_reg == frame_pointer_rtx
12806 || base_reg == arg_pointer_rtx
12807 || (REG_P (base_reg)
12808 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12809 || REGNO (base_reg) == R13_REG))))
12812 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12813 Avoid this by transforming to [%esi+0].
12814 Reload calls address legitimization without cfun defined, so we need
12815 to test cfun for being non-NULL. */
12816 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12817 && base_reg && !index_reg && !disp
12818 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12821 /* Special case: encode reg+reg instead of reg*2. */
12822 if (!base && index && scale == 2)
12823 base = index, base_reg = index_reg, scale = 1;
12825 /* Special case: scaling cannot be encoded without base or displacement. */
12826 if (!base && !disp && index && scale != 1)
12830 out->index = index;
12832 out->scale = scale;
12838 /* Return cost of the memory address x.
12839 For i386, it is better to use a complex address than let gcc copy
12840 the address into a reg and make a new pseudo. But not if the address
12841 requires to two regs - that would mean more pseudos with longer
12844 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
12846 struct ix86_address parts;
12848 int ok = ix86_decompose_address (x, &parts);
12852 if (parts.base && GET_CODE (parts.base) == SUBREG)
12853 parts.base = SUBREG_REG (parts.base);
12854 if (parts.index && GET_CODE (parts.index) == SUBREG)
12855 parts.index = SUBREG_REG (parts.index);
12857 /* Attempt to minimize number of registers in the address. */
12859 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12861 && (!REG_P (parts.index)
12862 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12865 /* When address base or index is "pic_offset_table_rtx" we don't increase
12866 address cost. When a memopt with "pic_offset_table_rtx" is not invariant
12867 itself it most likely means that base or index is not invariant.
12868 Therefore only "pic_offset_table_rtx" could be hoisted out, which is not
12869 profitable for x86. */
12871 && (current_pass->type == GIMPLE_PASS
12872 || (!pic_offset_table_rtx
12873 || REGNO (pic_offset_table_rtx) != REGNO(parts.base)))
12874 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12876 && (current_pass->type == GIMPLE_PASS
12877 || (!pic_offset_table_rtx
12878 || REGNO (pic_offset_table_rtx) != REGNO(parts.index)))
12879 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12880 && parts.base != parts.index)
12883 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12884 since it's predecode logic can't detect the length of instructions
12885 and it degenerates to vector decoded. Increase cost of such
12886 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12887 to split such addresses or even refuse such addresses at all.
12889 Following addressing modes are affected:
12894 The first and last case may be avoidable by explicitly coding the zero in
12895 memory address, but I don't have AMD-K6 machine handy to check this
12899 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12900 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12901 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12907 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12908 this is used for to form addresses to local data when -fPIC is in
12912 darwin_local_data_pic (rtx disp)
12914 return (GET_CODE (disp) == UNSPEC
12915 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12918 /* Determine if a given RTX is a valid constant. We already know this
12919 satisfies CONSTANT_P. */
12922 ix86_legitimate_constant_p (machine_mode, rtx x)
12924 /* Pointer bounds constants are not valid. */
12925 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
12928 switch (GET_CODE (x))
12933 if (GET_CODE (x) == PLUS)
12935 if (!CONST_INT_P (XEXP (x, 1)))
12940 if (TARGET_MACHO && darwin_local_data_pic (x))
12943 /* Only some unspecs are valid as "constants". */
12944 if (GET_CODE (x) == UNSPEC)
12945 switch (XINT (x, 1))
12948 case UNSPEC_GOTOFF:
12949 case UNSPEC_PLTOFF:
12950 return TARGET_64BIT;
12952 case UNSPEC_NTPOFF:
12953 x = XVECEXP (x, 0, 0);
12954 return (GET_CODE (x) == SYMBOL_REF
12955 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12956 case UNSPEC_DTPOFF:
12957 x = XVECEXP (x, 0, 0);
12958 return (GET_CODE (x) == SYMBOL_REF
12959 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12964 /* We must have drilled down to a symbol. */
12965 if (GET_CODE (x) == LABEL_REF)
12967 if (GET_CODE (x) != SYMBOL_REF)
12972 /* TLS symbols are never valid. */
12973 if (SYMBOL_REF_TLS_MODEL (x))
12976 /* DLLIMPORT symbols are never valid. */
12977 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12978 && SYMBOL_REF_DLLIMPORT_P (x))
12982 /* mdynamic-no-pic */
12983 if (MACHO_DYNAMIC_NO_PIC_P)
12984 return machopic_symbol_defined_p (x);
12989 if (GET_MODE (x) == TImode
12990 && x != CONST0_RTX (TImode)
12996 if (!standard_sse_constant_p (x))
13003 /* Otherwise we handle everything else in the move patterns. */
13007 /* Determine if it's legal to put X into the constant pool. This
13008 is not possible for the address of thread-local symbols, which
13009 is checked above. */
13012 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
13014 /* We can always put integral constants and vectors in memory. */
13015 switch (GET_CODE (x))
13025 return !ix86_legitimate_constant_p (mode, x);
13028 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
13032 is_imported_p (rtx x)
13034 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
13035 || GET_CODE (x) != SYMBOL_REF)
13038 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
13042 /* Nonzero if the constant value X is a legitimate general operand
13043 when generating PIC code. It is given that flag_pic is on and
13044 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
13047 legitimate_pic_operand_p (rtx x)
13051 switch (GET_CODE (x))
13054 inner = XEXP (x, 0);
13055 if (GET_CODE (inner) == PLUS
13056 && CONST_INT_P (XEXP (inner, 1)))
13057 inner = XEXP (inner, 0);
13059 /* Only some unspecs are valid as "constants". */
13060 if (GET_CODE (inner) == UNSPEC)
13061 switch (XINT (inner, 1))
13064 case UNSPEC_GOTOFF:
13065 case UNSPEC_PLTOFF:
13066 return TARGET_64BIT;
13068 x = XVECEXP (inner, 0, 0);
13069 return (GET_CODE (x) == SYMBOL_REF
13070 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13071 case UNSPEC_MACHOPIC_OFFSET:
13072 return legitimate_pic_address_disp_p (x);
13080 return legitimate_pic_address_disp_p (x);
13087 /* Determine if a given CONST RTX is a valid memory displacement
13091 legitimate_pic_address_disp_p (rtx disp)
13095 /* In 64bit mode we can allow direct addresses of symbols and labels
13096 when they are not dynamic symbols. */
13099 rtx op0 = disp, op1;
13101 switch (GET_CODE (disp))
13107 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13109 op0 = XEXP (XEXP (disp, 0), 0);
13110 op1 = XEXP (XEXP (disp, 0), 1);
13111 if (!CONST_INT_P (op1)
13112 || INTVAL (op1) >= 16*1024*1024
13113 || INTVAL (op1) < -16*1024*1024)
13115 if (GET_CODE (op0) == LABEL_REF)
13117 if (GET_CODE (op0) == CONST
13118 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13119 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13121 if (GET_CODE (op0) == UNSPEC
13122 && XINT (op0, 1) == UNSPEC_PCREL)
13124 if (GET_CODE (op0) != SYMBOL_REF)
13129 /* TLS references should always be enclosed in UNSPEC.
13130 The dllimported symbol needs always to be resolved. */
13131 if (SYMBOL_REF_TLS_MODEL (op0)
13132 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13137 if (is_imported_p (op0))
13140 if (SYMBOL_REF_FAR_ADDR_P (op0)
13141 || !SYMBOL_REF_LOCAL_P (op0))
13144 /* Function-symbols need to be resolved only for
13146 For the small-model we don't need to resolve anything
13148 if ((ix86_cmodel != CM_LARGE_PIC
13149 && SYMBOL_REF_FUNCTION_P (op0))
13150 || ix86_cmodel == CM_SMALL_PIC)
13152 /* Non-external symbols don't need to be resolved for
13153 large, and medium-model. */
13154 if ((ix86_cmodel == CM_LARGE_PIC
13155 || ix86_cmodel == CM_MEDIUM_PIC)
13156 && !SYMBOL_REF_EXTERNAL_P (op0))
13159 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13160 && (SYMBOL_REF_LOCAL_P (op0)
13161 || (HAVE_LD_PIE_COPYRELOC
13163 && !SYMBOL_REF_WEAK (op0)
13164 && !SYMBOL_REF_FUNCTION_P (op0)))
13165 && ix86_cmodel != CM_LARGE_PIC)
13173 if (GET_CODE (disp) != CONST)
13175 disp = XEXP (disp, 0);
13179 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13180 of GOT tables. We should not need these anyway. */
13181 if (GET_CODE (disp) != UNSPEC
13182 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13183 && XINT (disp, 1) != UNSPEC_GOTOFF
13184 && XINT (disp, 1) != UNSPEC_PCREL
13185 && XINT (disp, 1) != UNSPEC_PLTOFF))
13188 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13189 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13195 if (GET_CODE (disp) == PLUS)
13197 if (!CONST_INT_P (XEXP (disp, 1)))
13199 disp = XEXP (disp, 0);
13203 if (TARGET_MACHO && darwin_local_data_pic (disp))
13206 if (GET_CODE (disp) != UNSPEC)
13209 switch (XINT (disp, 1))
13214 /* We need to check for both symbols and labels because VxWorks loads
13215 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13217 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13218 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13219 case UNSPEC_GOTOFF:
13220 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13221 While ABI specify also 32bit relocation but we don't produce it in
13222 small PIC model at all. */
13223 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13224 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13226 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13228 case UNSPEC_GOTTPOFF:
13229 case UNSPEC_GOTNTPOFF:
13230 case UNSPEC_INDNTPOFF:
13233 disp = XVECEXP (disp, 0, 0);
13234 return (GET_CODE (disp) == SYMBOL_REF
13235 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13236 case UNSPEC_NTPOFF:
13237 disp = XVECEXP (disp, 0, 0);
13238 return (GET_CODE (disp) == SYMBOL_REF
13239 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13240 case UNSPEC_DTPOFF:
13241 disp = XVECEXP (disp, 0, 0);
13242 return (GET_CODE (disp) == SYMBOL_REF
13243 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13249 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
13250 replace the input X, or the original X if no replacement is called for.
13251 The output parameter *WIN is 1 if the calling macro should goto WIN,
13252 0 if it should not. */
13255 ix86_legitimize_reload_address (rtx x, machine_mode, int opnum, int type,
13258 /* Reload can generate:
13260 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
13264 This RTX is rejected from ix86_legitimate_address_p due to
13265 non-strictness of base register 97. Following this rejection,
13266 reload pushes all three components into separate registers,
13267 creating invalid memory address RTX.
13269 Following code reloads only the invalid part of the
13270 memory address RTX. */
13272 if (GET_CODE (x) == PLUS
13273 && REG_P (XEXP (x, 1))
13274 && GET_CODE (XEXP (x, 0)) == PLUS
13275 && REG_P (XEXP (XEXP (x, 0), 1)))
13278 bool something_reloaded = false;
13280 base = XEXP (XEXP (x, 0), 1);
13281 if (!REG_OK_FOR_BASE_STRICT_P (base))
13283 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
13284 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13285 opnum, (enum reload_type) type);
13286 something_reloaded = true;
13289 index = XEXP (x, 1);
13290 if (!REG_OK_FOR_INDEX_STRICT_P (index))
13292 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
13293 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13294 opnum, (enum reload_type) type);
13295 something_reloaded = true;
13298 gcc_assert (something_reloaded);
13305 /* Determine if op is suitable RTX for an address register.
13306 Return naked register if a register or a register subreg is
13307 found, otherwise return NULL_RTX. */
13310 ix86_validate_address_register (rtx op)
13312 machine_mode mode = GET_MODE (op);
13314 /* Only SImode or DImode registers can form the address. */
13315 if (mode != SImode && mode != DImode)
13320 else if (GET_CODE (op) == SUBREG)
13322 rtx reg = SUBREG_REG (op);
13327 mode = GET_MODE (reg);
13329 /* Don't allow SUBREGs that span more than a word. It can
13330 lead to spill failures when the register is one word out
13331 of a two word structure. */
13332 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13335 /* Allow only SUBREGs of non-eliminable hard registers. */
13336 if (register_no_elim_operand (reg, mode))
13340 /* Op is not a register. */
13344 /* Recognizes RTL expressions that are valid memory addresses for an
13345 instruction. The MODE argument is the machine mode for the MEM
13346 expression that wants to use this address.
13348 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13349 convert common non-canonical forms to canonical form so that they will
13353 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13355 struct ix86_address parts;
13356 rtx base, index, disp;
13357 HOST_WIDE_INT scale;
13358 enum ix86_address_seg seg;
13360 if (ix86_decompose_address (addr, &parts) <= 0)
13361 /* Decomposition failed. */
13365 index = parts.index;
13367 scale = parts.scale;
13370 /* Validate base register. */
13373 rtx reg = ix86_validate_address_register (base);
13375 if (reg == NULL_RTX)
13378 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13379 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13380 /* Base is not valid. */
13384 /* Validate index register. */
13387 rtx reg = ix86_validate_address_register (index);
13389 if (reg == NULL_RTX)
13392 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13393 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13394 /* Index is not valid. */
13398 /* Index and base should have the same mode. */
13400 && GET_MODE (base) != GET_MODE (index))
13403 /* Address override works only on the (%reg) part of %fs:(%reg). */
13404 if (seg != SEG_DEFAULT
13405 && ((base && GET_MODE (base) != word_mode)
13406 || (index && GET_MODE (index) != word_mode)))
13409 /* Validate scale factor. */
13413 /* Scale without index. */
13416 if (scale != 2 && scale != 4 && scale != 8)
13417 /* Scale is not a valid multiplier. */
13421 /* Validate displacement. */
13424 if (GET_CODE (disp) == CONST
13425 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13426 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13427 switch (XINT (XEXP (disp, 0), 1))
13429 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13430 used. While ABI specify also 32bit relocations, we don't produce
13431 them at all and use IP relative instead. */
13433 case UNSPEC_GOTOFF:
13434 gcc_assert (flag_pic);
13436 goto is_legitimate_pic;
13438 /* 64bit address unspec. */
13441 case UNSPEC_GOTPCREL:
13443 gcc_assert (flag_pic);
13444 goto is_legitimate_pic;
13446 case UNSPEC_GOTTPOFF:
13447 case UNSPEC_GOTNTPOFF:
13448 case UNSPEC_INDNTPOFF:
13449 case UNSPEC_NTPOFF:
13450 case UNSPEC_DTPOFF:
13453 case UNSPEC_STACK_CHECK:
13454 gcc_assert (flag_split_stack);
13458 /* Invalid address unspec. */
13462 else if (SYMBOLIC_CONST (disp)
13466 && MACHOPIC_INDIRECT
13467 && !machopic_operand_p (disp)
13473 if (TARGET_64BIT && (index || base))
13475 /* foo@dtpoff(%rX) is ok. */
13476 if (GET_CODE (disp) != CONST
13477 || GET_CODE (XEXP (disp, 0)) != PLUS
13478 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13479 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13480 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13481 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13482 /* Non-constant pic memory reference. */
13485 else if ((!TARGET_MACHO || flag_pic)
13486 && ! legitimate_pic_address_disp_p (disp))
13487 /* Displacement is an invalid pic construct. */
13490 else if (MACHO_DYNAMIC_NO_PIC_P
13491 && !ix86_legitimate_constant_p (Pmode, disp))
13492 /* displacment must be referenced via non_lazy_pointer */
13496 /* This code used to verify that a symbolic pic displacement
13497 includes the pic_offset_table_rtx register.
13499 While this is good idea, unfortunately these constructs may
13500 be created by "adds using lea" optimization for incorrect
13509 This code is nonsensical, but results in addressing
13510 GOT table with pic_offset_table_rtx base. We can't
13511 just refuse it easily, since it gets matched by
13512 "addsi3" pattern, that later gets split to lea in the
13513 case output register differs from input. While this
13514 can be handled by separate addsi pattern for this case
13515 that never results in lea, this seems to be easier and
13516 correct fix for crash to disable this test. */
13518 else if (GET_CODE (disp) != LABEL_REF
13519 && !CONST_INT_P (disp)
13520 && (GET_CODE (disp) != CONST
13521 || !ix86_legitimate_constant_p (Pmode, disp))
13522 && (GET_CODE (disp) != SYMBOL_REF
13523 || !ix86_legitimate_constant_p (Pmode, disp)))
13524 /* Displacement is not constant. */
13526 else if (TARGET_64BIT
13527 && !x86_64_immediate_operand (disp, VOIDmode))
13528 /* Displacement is out of range. */
13530 /* In x32 mode, constant addresses are sign extended to 64bit, so
13531 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13532 else if (TARGET_X32 && !(index || base)
13533 && CONST_INT_P (disp)
13534 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13538 /* Everything looks valid. */
13542 /* Determine if a given RTX is a valid constant address. */
13545 constant_address_p (rtx x)
13547 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13550 /* Return a unique alias set for the GOT. */
13552 static alias_set_type
13553 ix86_GOT_alias_set (void)
13555 static alias_set_type set = -1;
13557 set = new_alias_set ();
13561 /* Set regs_ever_live for PIC base address register
13562 to true if required. */
13564 set_pic_reg_ever_live ()
13566 if (reload_in_progress)
13567 df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true);
13570 /* Return a legitimate reference for ORIG (an address) using the
13571 register REG. If REG is 0, a new pseudo is generated.
13573 There are two types of references that must be handled:
13575 1. Global data references must load the address from the GOT, via
13576 the PIC reg. An insn is emitted to do this load, and the reg is
13579 2. Static data references, constant pool addresses, and code labels
13580 compute the address as an offset from the GOT, whose base is in
13581 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13582 differentiate them from global data objects. The returned
13583 address is the PIC reg + an unspec constant.
13585 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13586 reg also appears in the address. */
13589 legitimize_pic_address (rtx orig, rtx reg)
13592 rtx new_rtx = orig;
13595 if (TARGET_MACHO && !TARGET_64BIT)
13598 reg = gen_reg_rtx (Pmode);
13599 /* Use the generic Mach-O PIC machinery. */
13600 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13604 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13606 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13611 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13613 else if (TARGET_64BIT && !TARGET_PECOFF
13614 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13617 /* This symbol may be referenced via a displacement from the PIC
13618 base address (@GOTOFF). */
13620 set_pic_reg_ever_live ();
13621 if (GET_CODE (addr) == CONST)
13622 addr = XEXP (addr, 0);
13623 if (GET_CODE (addr) == PLUS)
13625 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13627 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13630 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13631 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13633 tmpreg = gen_reg_rtx (Pmode);
13636 emit_move_insn (tmpreg, new_rtx);
13640 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13641 tmpreg, 1, OPTAB_DIRECT);
13645 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13647 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13649 /* This symbol may be referenced via a displacement from the PIC
13650 base address (@GOTOFF). */
13652 set_pic_reg_ever_live ();
13653 if (GET_CODE (addr) == CONST)
13654 addr = XEXP (addr, 0);
13655 if (GET_CODE (addr) == PLUS)
13657 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13659 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13662 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13663 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13664 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13668 emit_move_insn (reg, new_rtx);
13672 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13673 /* We can't use @GOTOFF for text labels on VxWorks;
13674 see gotoff_operand. */
13675 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13677 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13681 /* For x64 PE-COFF there is no GOT table. So we use address
13683 if (TARGET_64BIT && TARGET_PECOFF)
13685 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13686 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13689 reg = gen_reg_rtx (Pmode);
13690 emit_move_insn (reg, new_rtx);
13693 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13695 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13696 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13697 new_rtx = gen_const_mem (Pmode, new_rtx);
13698 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13701 reg = gen_reg_rtx (Pmode);
13702 /* Use directly gen_movsi, otherwise the address is loaded
13703 into register for CSE. We don't want to CSE this addresses,
13704 instead we CSE addresses from the GOT table, so skip this. */
13705 emit_insn (gen_movsi (reg, new_rtx));
13710 /* This symbol must be referenced via a load from the
13711 Global Offset Table (@GOT). */
13713 set_pic_reg_ever_live ();
13714 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13715 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13717 new_rtx = force_reg (Pmode, new_rtx);
13718 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13719 new_rtx = gen_const_mem (Pmode, new_rtx);
13720 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13723 reg = gen_reg_rtx (Pmode);
13724 emit_move_insn (reg, new_rtx);
13730 if (CONST_INT_P (addr)
13731 && !x86_64_immediate_operand (addr, VOIDmode))
13735 emit_move_insn (reg, addr);
13739 new_rtx = force_reg (Pmode, addr);
13741 else if (GET_CODE (addr) == CONST)
13743 addr = XEXP (addr, 0);
13745 /* We must match stuff we generate before. Assume the only
13746 unspecs that can get here are ours. Not that we could do
13747 anything with them anyway.... */
13748 if (GET_CODE (addr) == UNSPEC
13749 || (GET_CODE (addr) == PLUS
13750 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13752 gcc_assert (GET_CODE (addr) == PLUS);
13754 if (GET_CODE (addr) == PLUS)
13756 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13758 /* Check first to see if this is a constant offset from a @GOTOFF
13759 symbol reference. */
13760 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13761 && CONST_INT_P (op1))
13765 set_pic_reg_ever_live ();
13766 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13768 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13769 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13770 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13774 emit_move_insn (reg, new_rtx);
13780 if (INTVAL (op1) < -16*1024*1024
13781 || INTVAL (op1) >= 16*1024*1024)
13783 if (!x86_64_immediate_operand (op1, Pmode))
13784 op1 = force_reg (Pmode, op1);
13785 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13791 rtx base = legitimize_pic_address (op0, reg);
13792 machine_mode mode = GET_MODE (base);
13794 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13796 if (CONST_INT_P (new_rtx))
13798 if (INTVAL (new_rtx) < -16*1024*1024
13799 || INTVAL (new_rtx) >= 16*1024*1024)
13801 if (!x86_64_immediate_operand (new_rtx, mode))
13802 new_rtx = force_reg (mode, new_rtx);
13804 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13807 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13811 if (GET_CODE (new_rtx) == PLUS
13812 && CONSTANT_P (XEXP (new_rtx, 1)))
13814 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13815 new_rtx = XEXP (new_rtx, 1);
13817 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13825 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13828 get_thread_pointer (machine_mode tp_mode, bool to_reg)
13830 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13832 if (GET_MODE (tp) != tp_mode)
13834 gcc_assert (GET_MODE (tp) == SImode);
13835 gcc_assert (tp_mode == DImode);
13837 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13841 tp = copy_to_mode_reg (tp_mode, tp);
13846 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13848 static GTY(()) rtx ix86_tls_symbol;
13851 ix86_tls_get_addr (void)
13853 if (!ix86_tls_symbol)
13856 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13857 ? "___tls_get_addr" : "__tls_get_addr");
13859 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13862 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13864 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13866 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13867 gen_rtx_CONST (Pmode, unspec));
13870 return ix86_tls_symbol;
13873 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13875 static GTY(()) rtx ix86_tls_module_base_symbol;
13878 ix86_tls_module_base (void)
13880 if (!ix86_tls_module_base_symbol)
13882 ix86_tls_module_base_symbol
13883 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13885 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13886 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13889 return ix86_tls_module_base_symbol;
13892 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13893 false if we expect this to be used for a memory address and true if
13894 we expect to load the address into a register. */
13897 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13899 rtx dest, base, off;
13900 rtx pic = NULL_RTX, tp = NULL_RTX;
13901 machine_mode tp_mode = Pmode;
13904 /* Fall back to global dynamic model if tool chain cannot support local
13906 if (TARGET_SUN_TLS && !TARGET_64BIT
13907 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13908 && model == TLS_MODEL_LOCAL_DYNAMIC)
13909 model = TLS_MODEL_GLOBAL_DYNAMIC;
13913 case TLS_MODEL_GLOBAL_DYNAMIC:
13914 dest = gen_reg_rtx (Pmode);
13918 if (flag_pic && !TARGET_PECOFF)
13919 pic = pic_offset_table_rtx;
13922 pic = gen_reg_rtx (Pmode);
13923 emit_insn (gen_set_got (pic));
13927 if (TARGET_GNU2_TLS)
13930 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13932 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13934 tp = get_thread_pointer (Pmode, true);
13935 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13937 if (GET_MODE (x) != Pmode)
13938 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13940 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13944 rtx caddr = ix86_tls_get_addr ();
13948 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13953 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13954 insns = get_insns ();
13957 if (GET_MODE (x) != Pmode)
13958 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13960 RTL_CONST_CALL_P (insns) = 1;
13961 emit_libcall_block (insns, dest, rax, x);
13964 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13968 case TLS_MODEL_LOCAL_DYNAMIC:
13969 base = gen_reg_rtx (Pmode);
13974 pic = pic_offset_table_rtx;
13977 pic = gen_reg_rtx (Pmode);
13978 emit_insn (gen_set_got (pic));
13982 if (TARGET_GNU2_TLS)
13984 rtx tmp = ix86_tls_module_base ();
13987 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13989 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13991 tp = get_thread_pointer (Pmode, true);
13992 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13993 gen_rtx_MINUS (Pmode, tmp, tp));
13997 rtx caddr = ix86_tls_get_addr ();
14001 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14007 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
14008 insns = get_insns ();
14011 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
14012 share the LD_BASE result with other LD model accesses. */
14013 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
14014 UNSPEC_TLS_LD_BASE);
14016 RTL_CONST_CALL_P (insns) = 1;
14017 emit_libcall_block (insns, base, rax, eqv);
14020 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
14023 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
14024 off = gen_rtx_CONST (Pmode, off);
14026 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
14028 if (TARGET_GNU2_TLS)
14030 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
14032 if (GET_MODE (x) != Pmode)
14033 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14035 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14039 case TLS_MODEL_INITIAL_EXEC:
14042 if (TARGET_SUN_TLS && !TARGET_X32)
14044 /* The Sun linker took the AMD64 TLS spec literally
14045 and can only handle %rax as destination of the
14046 initial executable code sequence. */
14048 dest = gen_reg_rtx (DImode);
14049 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14053 /* Generate DImode references to avoid %fs:(%reg32)
14054 problems and linker IE->LE relaxation bug. */
14057 type = UNSPEC_GOTNTPOFF;
14061 set_pic_reg_ever_live ();
14062 pic = pic_offset_table_rtx;
14063 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14065 else if (!TARGET_ANY_GNU_TLS)
14067 pic = gen_reg_rtx (Pmode);
14068 emit_insn (gen_set_got (pic));
14069 type = UNSPEC_GOTTPOFF;
14074 type = UNSPEC_INDNTPOFF;
14077 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14078 off = gen_rtx_CONST (tp_mode, off);
14080 off = gen_rtx_PLUS (tp_mode, pic, off);
14081 off = gen_const_mem (tp_mode, off);
14082 set_mem_alias_set (off, ix86_GOT_alias_set ());
14084 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14086 base = get_thread_pointer (tp_mode,
14087 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14088 off = force_reg (tp_mode, off);
14089 return gen_rtx_PLUS (tp_mode, base, off);
14093 base = get_thread_pointer (Pmode, true);
14094 dest = gen_reg_rtx (Pmode);
14095 emit_insn (ix86_gen_sub3 (dest, base, off));
14099 case TLS_MODEL_LOCAL_EXEC:
14100 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14101 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14102 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14103 off = gen_rtx_CONST (Pmode, off);
14105 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14107 base = get_thread_pointer (Pmode,
14108 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14109 return gen_rtx_PLUS (Pmode, base, off);
14113 base = get_thread_pointer (Pmode, true);
14114 dest = gen_reg_rtx (Pmode);
14115 emit_insn (ix86_gen_sub3 (dest, base, off));
14120 gcc_unreachable ();
14126 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14127 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14128 unique refptr-DECL symbol corresponding to symbol DECL. */
14130 struct dllimport_hasher : ggc_cache_hasher<tree_map *>
14132 static inline hashval_t hash (tree_map *m) { return m->hash; }
14134 equal (tree_map *a, tree_map *b)
14136 return a->base.from == b->base.from;
14140 handle_cache_entry (tree_map *&m)
14142 extern void gt_ggc_mx (tree_map *&);
14143 if (m == HTAB_EMPTY_ENTRY || m == HTAB_DELETED_ENTRY)
14145 else if (ggc_marked_p (m->base.from))
14148 m = static_cast<tree_map *> (HTAB_DELETED_ENTRY);
14152 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14155 get_dllimport_decl (tree decl, bool beimport)
14157 struct tree_map *h, in;
14159 const char *prefix;
14160 size_t namelen, prefixlen;
14165 if (!dllimport_map)
14166 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14168 in.hash = htab_hash_pointer (decl);
14169 in.base.from = decl;
14170 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14175 *loc = h = ggc_alloc<tree_map> ();
14177 h->base.from = decl;
14178 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14179 VAR_DECL, NULL, ptr_type_node);
14180 DECL_ARTIFICIAL (to) = 1;
14181 DECL_IGNORED_P (to) = 1;
14182 DECL_EXTERNAL (to) = 1;
14183 TREE_READONLY (to) = 1;
14185 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14186 name = targetm.strip_name_encoding (name);
14188 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14189 ? "*__imp_" : "*__imp__";
14191 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14192 namelen = strlen (name);
14193 prefixlen = strlen (prefix);
14194 imp_name = (char *) alloca (namelen + prefixlen + 1);
14195 memcpy (imp_name, prefix, prefixlen);
14196 memcpy (imp_name + prefixlen, name, namelen + 1);
14198 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14199 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14200 SET_SYMBOL_REF_DECL (rtl, to);
14201 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14204 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14205 #ifdef SUB_TARGET_RECORD_STUB
14206 SUB_TARGET_RECORD_STUB (name);
14210 rtl = gen_const_mem (Pmode, rtl);
14211 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14213 SET_DECL_RTL (to, rtl);
14214 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14219 /* Expand SYMBOL into its corresponding far-addresse symbol.
14220 WANT_REG is true if we require the result be a register. */
14223 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14228 gcc_assert (SYMBOL_REF_DECL (symbol));
14229 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14231 x = DECL_RTL (imp_decl);
14233 x = force_reg (Pmode, x);
14237 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14238 true if we require the result be a register. */
14241 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14246 gcc_assert (SYMBOL_REF_DECL (symbol));
14247 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14249 x = DECL_RTL (imp_decl);
14251 x = force_reg (Pmode, x);
14255 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14256 is true if we require the result be a register. */
14259 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14261 if (!TARGET_PECOFF)
14264 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14266 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14267 return legitimize_dllimport_symbol (addr, inreg);
14268 if (GET_CODE (addr) == CONST
14269 && GET_CODE (XEXP (addr, 0)) == PLUS
14270 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14271 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14273 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14274 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14278 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14280 if (GET_CODE (addr) == SYMBOL_REF
14281 && !is_imported_p (addr)
14282 && SYMBOL_REF_EXTERNAL_P (addr)
14283 && SYMBOL_REF_DECL (addr))
14284 return legitimize_pe_coff_extern_decl (addr, inreg);
14286 if (GET_CODE (addr) == CONST
14287 && GET_CODE (XEXP (addr, 0)) == PLUS
14288 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14289 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14290 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14291 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14293 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14294 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14299 /* Try machine-dependent ways of modifying an illegitimate address
14300 to be legitimate. If we find one, return the new, valid address.
14301 This macro is used in only one place: `memory_address' in explow.c.
14303 OLDX is the address as it was before break_out_memory_refs was called.
14304 In some cases it is useful to look at this to decide what needs to be done.
14306 It is always safe for this macro to do nothing. It exists to recognize
14307 opportunities to optimize the output.
14309 For the 80386, we handle X+REG by loading X into a register R and
14310 using R+REG. R will go in a general reg and indexing will be used.
14311 However, if REG is a broken-out memory address or multiplication,
14312 nothing needs to be done because REG can certainly go in a general reg.
14314 When -fpic is used, special handling is needed for symbolic references.
14315 See comments by legitimize_pic_address in i386.c for details. */
14318 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14320 bool changed = false;
14323 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14325 return legitimize_tls_address (x, (enum tls_model) log, false);
14326 if (GET_CODE (x) == CONST
14327 && GET_CODE (XEXP (x, 0)) == PLUS
14328 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14329 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14331 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14332 (enum tls_model) log, false);
14333 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14336 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14338 rtx tmp = legitimize_pe_coff_symbol (x, true);
14343 if (flag_pic && SYMBOLIC_CONST (x))
14344 return legitimize_pic_address (x, 0);
14347 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14348 return machopic_indirect_data_reference (x, 0);
14351 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14352 if (GET_CODE (x) == ASHIFT
14353 && CONST_INT_P (XEXP (x, 1))
14354 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14357 log = INTVAL (XEXP (x, 1));
14358 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14359 GEN_INT (1 << log));
14362 if (GET_CODE (x) == PLUS)
14364 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14366 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14367 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14368 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14371 log = INTVAL (XEXP (XEXP (x, 0), 1));
14372 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14373 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14374 GEN_INT (1 << log));
14377 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14378 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14379 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14382 log = INTVAL (XEXP (XEXP (x, 1), 1));
14383 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14384 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14385 GEN_INT (1 << log));
14388 /* Put multiply first if it isn't already. */
14389 if (GET_CODE (XEXP (x, 1)) == MULT)
14391 std::swap (XEXP (x, 0), XEXP (x, 1));
14395 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14396 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14397 created by virtual register instantiation, register elimination, and
14398 similar optimizations. */
14399 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14402 x = gen_rtx_PLUS (Pmode,
14403 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14404 XEXP (XEXP (x, 1), 0)),
14405 XEXP (XEXP (x, 1), 1));
14409 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14410 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14411 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14412 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14413 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14414 && CONSTANT_P (XEXP (x, 1)))
14417 rtx other = NULL_RTX;
14419 if (CONST_INT_P (XEXP (x, 1)))
14421 constant = XEXP (x, 1);
14422 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14424 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14426 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14427 other = XEXP (x, 1);
14435 x = gen_rtx_PLUS (Pmode,
14436 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14437 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14438 plus_constant (Pmode, other,
14439 INTVAL (constant)));
14443 if (changed && ix86_legitimate_address_p (mode, x, false))
14446 if (GET_CODE (XEXP (x, 0)) == MULT)
14449 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14452 if (GET_CODE (XEXP (x, 1)) == MULT)
14455 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14459 && REG_P (XEXP (x, 1))
14460 && REG_P (XEXP (x, 0)))
14463 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14466 x = legitimize_pic_address (x, 0);
14469 if (changed && ix86_legitimate_address_p (mode, x, false))
14472 if (REG_P (XEXP (x, 0)))
14474 rtx temp = gen_reg_rtx (Pmode);
14475 rtx val = force_operand (XEXP (x, 1), temp);
14478 val = convert_to_mode (Pmode, val, 1);
14479 emit_move_insn (temp, val);
14482 XEXP (x, 1) = temp;
14486 else if (REG_P (XEXP (x, 1)))
14488 rtx temp = gen_reg_rtx (Pmode);
14489 rtx val = force_operand (XEXP (x, 0), temp);
14492 val = convert_to_mode (Pmode, val, 1);
14493 emit_move_insn (temp, val);
14496 XEXP (x, 0) = temp;
14504 /* Print an integer constant expression in assembler syntax. Addition
14505 and subtraction are the only arithmetic that may appear in these
14506 expressions. FILE is the stdio stream to write to, X is the rtx, and
14507 CODE is the operand print code from the output string. */
14510 output_pic_addr_const (FILE *file, rtx x, int code)
14514 switch (GET_CODE (x))
14517 gcc_assert (flag_pic);
14522 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14523 output_addr_const (file, x);
14526 const char *name = XSTR (x, 0);
14528 /* Mark the decl as referenced so that cgraph will
14529 output the function. */
14530 if (SYMBOL_REF_DECL (x))
14531 mark_decl_referenced (SYMBOL_REF_DECL (x));
14534 if (MACHOPIC_INDIRECT
14535 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14536 name = machopic_indirection_name (x, /*stub_p=*/true);
14538 assemble_name (file, name);
14540 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14541 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14542 fputs ("@PLT", file);
14549 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14550 assemble_name (asm_out_file, buf);
14554 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14558 /* This used to output parentheses around the expression,
14559 but that does not work on the 386 (either ATT or BSD assembler). */
14560 output_pic_addr_const (file, XEXP (x, 0), code);
14564 if (GET_MODE (x) == VOIDmode)
14566 /* We can use %d if the number is <32 bits and positive. */
14567 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14568 fprintf (file, "0x%lx%08lx",
14569 (unsigned long) CONST_DOUBLE_HIGH (x),
14570 (unsigned long) CONST_DOUBLE_LOW (x));
14572 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14575 /* We can't handle floating point constants;
14576 TARGET_PRINT_OPERAND must handle them. */
14577 output_operand_lossage ("floating constant misused");
14581 /* Some assemblers need integer constants to appear first. */
14582 if (CONST_INT_P (XEXP (x, 0)))
14584 output_pic_addr_const (file, XEXP (x, 0), code);
14586 output_pic_addr_const (file, XEXP (x, 1), code);
14590 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14591 output_pic_addr_const (file, XEXP (x, 1), code);
14593 output_pic_addr_const (file, XEXP (x, 0), code);
14599 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14600 output_pic_addr_const (file, XEXP (x, 0), code);
14602 output_pic_addr_const (file, XEXP (x, 1), code);
14604 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14608 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14610 bool f = i386_asm_output_addr_const_extra (file, x);
14615 gcc_assert (XVECLEN (x, 0) == 1);
14616 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14617 switch (XINT (x, 1))
14620 fputs ("@GOT", file);
14622 case UNSPEC_GOTOFF:
14623 fputs ("@GOTOFF", file);
14625 case UNSPEC_PLTOFF:
14626 fputs ("@PLTOFF", file);
14629 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14630 "(%rip)" : "[rip]", file);
14632 case UNSPEC_GOTPCREL:
14633 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14634 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14636 case UNSPEC_GOTTPOFF:
14637 /* FIXME: This might be @TPOFF in Sun ld too. */
14638 fputs ("@gottpoff", file);
14641 fputs ("@tpoff", file);
14643 case UNSPEC_NTPOFF:
14645 fputs ("@tpoff", file);
14647 fputs ("@ntpoff", file);
14649 case UNSPEC_DTPOFF:
14650 fputs ("@dtpoff", file);
14652 case UNSPEC_GOTNTPOFF:
14654 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14655 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14657 fputs ("@gotntpoff", file);
14659 case UNSPEC_INDNTPOFF:
14660 fputs ("@indntpoff", file);
14663 case UNSPEC_MACHOPIC_OFFSET:
14665 machopic_output_function_base_name (file);
14669 output_operand_lossage ("invalid UNSPEC as operand");
14675 output_operand_lossage ("invalid expression as operand");
14679 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14680 We need to emit DTP-relative relocations. */
14682 static void ATTRIBUTE_UNUSED
14683 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14685 fputs (ASM_LONG, file);
14686 output_addr_const (file, x);
14687 fputs ("@dtpoff", file);
14693 fputs (", 0", file);
14696 gcc_unreachable ();
14700 /* Return true if X is a representation of the PIC register. This copes
14701 with calls from ix86_find_base_term, where the register might have
14702 been replaced by a cselib value. */
14705 ix86_pic_register_p (rtx x)
14707 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14708 return (pic_offset_table_rtx
14709 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14710 else if (!REG_P (x))
14712 else if (pic_offset_table_rtx)
14714 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14716 if (HARD_REGISTER_P (x)
14717 && !HARD_REGISTER_P (pic_offset_table_rtx)
14718 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14723 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14726 /* Helper function for ix86_delegitimize_address.
14727 Attempt to delegitimize TLS local-exec accesses. */
14730 ix86_delegitimize_tls_address (rtx orig_x)
14732 rtx x = orig_x, unspec;
14733 struct ix86_address addr;
14735 if (!TARGET_TLS_DIRECT_SEG_REFS)
14739 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14741 if (ix86_decompose_address (x, &addr) == 0
14742 || addr.seg != DEFAULT_TLS_SEG_REG
14743 || addr.disp == NULL_RTX
14744 || GET_CODE (addr.disp) != CONST)
14746 unspec = XEXP (addr.disp, 0);
14747 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14748 unspec = XEXP (unspec, 0);
14749 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14751 x = XVECEXP (unspec, 0, 0);
14752 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14753 if (unspec != XEXP (addr.disp, 0))
14754 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14757 rtx idx = addr.index;
14758 if (addr.scale != 1)
14759 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14760 x = gen_rtx_PLUS (Pmode, idx, x);
14763 x = gen_rtx_PLUS (Pmode, addr.base, x);
14764 if (MEM_P (orig_x))
14765 x = replace_equiv_address_nv (orig_x, x);
14769 /* In the name of slightly smaller debug output, and to cater to
14770 general assembler lossage, recognize PIC+GOTOFF and turn it back
14771 into a direct symbol reference.
14773 On Darwin, this is necessary to avoid a crash, because Darwin
14774 has a different PIC label for each routine but the DWARF debugging
14775 information is not associated with any particular routine, so it's
14776 necessary to remove references to the PIC label from RTL stored by
14777 the DWARF output code. */
14780 ix86_delegitimize_address (rtx x)
14782 rtx orig_x = delegitimize_mem_from_attrs (x);
14783 /* addend is NULL or some rtx if x is something+GOTOFF where
14784 something doesn't include the PIC register. */
14785 rtx addend = NULL_RTX;
14786 /* reg_addend is NULL or a multiple of some register. */
14787 rtx reg_addend = NULL_RTX;
14788 /* const_addend is NULL or a const_int. */
14789 rtx const_addend = NULL_RTX;
14790 /* This is the result, or NULL. */
14791 rtx result = NULL_RTX;
14800 if (GET_CODE (x) == CONST
14801 && GET_CODE (XEXP (x, 0)) == PLUS
14802 && GET_MODE (XEXP (x, 0)) == Pmode
14803 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14804 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14805 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14807 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14808 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14809 if (MEM_P (orig_x))
14810 x = replace_equiv_address_nv (orig_x, x);
14814 if (GET_CODE (x) == CONST
14815 && GET_CODE (XEXP (x, 0)) == UNSPEC
14816 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14817 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14818 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14820 x = XVECEXP (XEXP (x, 0), 0, 0);
14821 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14823 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14831 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14832 return ix86_delegitimize_tls_address (orig_x);
14834 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14835 and -mcmodel=medium -fpic. */
14838 if (GET_CODE (x) != PLUS
14839 || GET_CODE (XEXP (x, 1)) != CONST)
14840 return ix86_delegitimize_tls_address (orig_x);
14842 if (ix86_pic_register_p (XEXP (x, 0)))
14843 /* %ebx + GOT/GOTOFF */
14845 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14847 /* %ebx + %reg * scale + GOT/GOTOFF */
14848 reg_addend = XEXP (x, 0);
14849 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14850 reg_addend = XEXP (reg_addend, 1);
14851 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14852 reg_addend = XEXP (reg_addend, 0);
14855 reg_addend = NULL_RTX;
14856 addend = XEXP (x, 0);
14860 addend = XEXP (x, 0);
14862 x = XEXP (XEXP (x, 1), 0);
14863 if (GET_CODE (x) == PLUS
14864 && CONST_INT_P (XEXP (x, 1)))
14866 const_addend = XEXP (x, 1);
14870 if (GET_CODE (x) == UNSPEC
14871 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14872 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14873 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14874 && !MEM_P (orig_x) && !addend)))
14875 result = XVECEXP (x, 0, 0);
14877 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14878 && !MEM_P (orig_x))
14879 result = XVECEXP (x, 0, 0);
14882 return ix86_delegitimize_tls_address (orig_x);
14885 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14887 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14890 /* If the rest of original X doesn't involve the PIC register, add
14891 addend and subtract pic_offset_table_rtx. This can happen e.g.
14893 leal (%ebx, %ecx, 4), %ecx
14895 movl foo@GOTOFF(%ecx), %edx
14896 in which case we return (%ecx - %ebx) + foo
14897 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
14898 and reload has completed. */
14899 if (pic_offset_table_rtx
14900 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
14901 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14902 pic_offset_table_rtx),
14904 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
14906 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
14907 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
14908 result = gen_rtx_PLUS (Pmode, tmp, result);
14913 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14915 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14916 if (result == NULL_RTX)
14922 /* If X is a machine specific address (i.e. a symbol or label being
14923 referenced as a displacement from the GOT implemented using an
14924 UNSPEC), then return the base term. Otherwise return X. */
14927 ix86_find_base_term (rtx x)
14933 if (GET_CODE (x) != CONST)
14935 term = XEXP (x, 0);
14936 if (GET_CODE (term) == PLUS
14937 && (CONST_INT_P (XEXP (term, 1))
14938 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14939 term = XEXP (term, 0);
14940 if (GET_CODE (term) != UNSPEC
14941 || (XINT (term, 1) != UNSPEC_GOTPCREL
14942 && XINT (term, 1) != UNSPEC_PCREL))
14945 return XVECEXP (term, 0, 0);
14948 return ix86_delegitimize_address (x);
14952 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
14953 bool fp, FILE *file)
14955 const char *suffix;
14957 if (mode == CCFPmode || mode == CCFPUmode)
14959 code = ix86_fp_compare_code_to_integer (code);
14963 code = reverse_condition (code);
15014 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
15018 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
15019 Those same assemblers have the same but opposite lossage on cmov. */
15020 if (mode == CCmode)
15021 suffix = fp ? "nbe" : "a";
15023 gcc_unreachable ();
15039 gcc_unreachable ();
15043 if (mode == CCmode)
15045 else if (mode == CCCmode)
15046 suffix = fp ? "b" : "c";
15048 gcc_unreachable ();
15064 gcc_unreachable ();
15068 if (mode == CCmode)
15070 else if (mode == CCCmode)
15071 suffix = fp ? "nb" : "nc";
15073 gcc_unreachable ();
15076 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15080 if (mode == CCmode)
15083 gcc_unreachable ();
15086 suffix = fp ? "u" : "p";
15089 suffix = fp ? "nu" : "np";
15092 gcc_unreachable ();
15094 fputs (suffix, file);
15097 /* Print the name of register X to FILE based on its machine mode and number.
15098 If CODE is 'w', pretend the mode is HImode.
15099 If CODE is 'b', pretend the mode is QImode.
15100 If CODE is 'k', pretend the mode is SImode.
15101 If CODE is 'q', pretend the mode is DImode.
15102 If CODE is 'x', pretend the mode is V4SFmode.
15103 If CODE is 't', pretend the mode is V8SFmode.
15104 If CODE is 'g', pretend the mode is V16SFmode.
15105 If CODE is 'h', pretend the reg is the 'high' byte register.
15106 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15107 If CODE is 'd', duplicate the operand for AVX instruction.
15111 print_reg (rtx x, int code, FILE *file)
15114 unsigned int regno;
15115 bool duplicated = code == 'd' && TARGET_AVX;
15117 if (ASSEMBLER_DIALECT == ASM_ATT)
15122 gcc_assert (TARGET_64BIT);
15123 fputs ("rip", file);
15127 regno = true_regnum (x);
15128 gcc_assert (regno != ARG_POINTER_REGNUM
15129 && regno != FRAME_POINTER_REGNUM
15130 && regno != FLAGS_REG
15131 && regno != FPSR_REG
15132 && regno != FPCR_REG);
15134 if (code == 'w' || MMX_REG_P (x))
15136 else if (code == 'b')
15138 else if (code == 'k')
15140 else if (code == 'q')
15142 else if (code == 'y')
15144 else if (code == 'h')
15146 else if (code == 'x')
15148 else if (code == 't')
15150 else if (code == 'g')
15153 code = GET_MODE_SIZE (GET_MODE (x));
15155 /* Irritatingly, AMD extended registers use different naming convention
15156 from the normal registers: "r%d[bwd]" */
15157 if (REX_INT_REGNO_P (regno))
15159 gcc_assert (TARGET_64BIT);
15161 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
15165 error ("extended registers have no high halves");
15180 error ("unsupported operand size for extended register");
15190 if (STACK_TOP_P (x))
15199 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x) && ! ANY_BND_REG_P (x))
15200 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
15205 reg = hi_reg_name[regno];
15208 if (regno >= ARRAY_SIZE (qi_reg_name))
15210 reg = qi_reg_name[regno];
15213 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15215 reg = qi_high_reg_name[regno];
15220 gcc_assert (!duplicated);
15222 fputs (hi_reg_name[regno] + 1, file);
15228 gcc_assert (!duplicated);
15230 fputs (hi_reg_name[REGNO (x)] + 1, file);
15235 gcc_unreachable ();
15241 if (ASSEMBLER_DIALECT == ASM_ATT)
15242 fprintf (file, ", %%%s", reg);
15244 fprintf (file, ", %s", reg);
15248 /* Meaning of CODE:
15249 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15250 C -- print opcode suffix for set/cmov insn.
15251 c -- like C, but print reversed condition
15252 F,f -- likewise, but for floating-point.
15253 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15255 R -- print embeded rounding and sae.
15256 r -- print only sae.
15257 z -- print the opcode suffix for the size of the current operand.
15258 Z -- likewise, with special suffixes for x87 instructions.
15259 * -- print a star (in certain assembler syntax)
15260 A -- print an absolute memory reference.
15261 E -- print address with DImode register names if TARGET_64BIT.
15262 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15263 s -- print a shift double count, followed by the assemblers argument
15265 b -- print the QImode name of the register for the indicated operand.
15266 %b0 would print %al if operands[0] is reg 0.
15267 w -- likewise, print the HImode name of the register.
15268 k -- likewise, print the SImode name of the register.
15269 q -- likewise, print the DImode name of the register.
15270 x -- likewise, print the V4SFmode name of the register.
15271 t -- likewise, print the V8SFmode name of the register.
15272 g -- likewise, print the V16SFmode name of the register.
15273 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15274 y -- print "st(0)" instead of "st" as a register.
15275 d -- print duplicated register operand for AVX instruction.
15276 D -- print condition for SSE cmp instruction.
15277 P -- if PIC, print an @PLT suffix.
15278 p -- print raw symbol name.
15279 X -- don't print any sort of PIC '@' suffix for a symbol.
15280 & -- print some in-use local-dynamic symbol name.
15281 H -- print a memory address offset by 8; used for sse high-parts
15282 Y -- print condition for XOP pcom* instruction.
15283 + -- print a branch hint as 'cs' or 'ds' prefix
15284 ; -- print a semicolon (after prefixes due to bug in older gas).
15285 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15286 @ -- print a segment register of thread base pointer load
15287 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15288 ! -- print MPX prefix for jxx/call/ret instructions if required.
15292 ix86_print_operand (FILE *file, rtx x, int code)
15299 switch (ASSEMBLER_DIALECT)
15306 /* Intel syntax. For absolute addresses, registers should not
15307 be surrounded by braces. */
15311 ix86_print_operand (file, x, 0);
15318 gcc_unreachable ();
15321 ix86_print_operand (file, x, 0);
15325 /* Wrap address in an UNSPEC to declare special handling. */
15327 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15329 output_address (x);
15333 if (ASSEMBLER_DIALECT == ASM_ATT)
15338 if (ASSEMBLER_DIALECT == ASM_ATT)
15343 if (ASSEMBLER_DIALECT == ASM_ATT)
15348 if (ASSEMBLER_DIALECT == ASM_ATT)
15353 if (ASSEMBLER_DIALECT == ASM_ATT)
15358 if (ASSEMBLER_DIALECT == ASM_ATT)
15363 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15364 if (ASSEMBLER_DIALECT != ASM_ATT)
15367 switch (GET_MODE_SIZE (GET_MODE (x)))
15382 output_operand_lossage
15383 ("invalid operand size for operand code 'O'");
15392 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15394 /* Opcodes don't get size suffixes if using Intel opcodes. */
15395 if (ASSEMBLER_DIALECT == ASM_INTEL)
15398 switch (GET_MODE_SIZE (GET_MODE (x)))
15417 output_operand_lossage
15418 ("invalid operand size for operand code 'z'");
15423 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15425 (0, "non-integer operand used with operand code 'z'");
15429 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15430 if (ASSEMBLER_DIALECT == ASM_INTEL)
15433 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15435 switch (GET_MODE_SIZE (GET_MODE (x)))
15438 #ifdef HAVE_AS_IX86_FILDS
15448 #ifdef HAVE_AS_IX86_FILDQ
15451 fputs ("ll", file);
15459 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15461 /* 387 opcodes don't get size suffixes
15462 if the operands are registers. */
15463 if (STACK_REG_P (x))
15466 switch (GET_MODE_SIZE (GET_MODE (x)))
15487 output_operand_lossage
15488 ("invalid operand type used with operand code 'Z'");
15492 output_operand_lossage
15493 ("invalid operand size for operand code 'Z'");
15512 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15514 ix86_print_operand (file, x, 0);
15515 fputs (", ", file);
15520 switch (GET_CODE (x))
15523 fputs ("neq", file);
15526 fputs ("eq", file);
15530 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15534 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15538 fputs ("le", file);
15542 fputs ("lt", file);
15545 fputs ("unord", file);
15548 fputs ("ord", file);
15551 fputs ("ueq", file);
15554 fputs ("nlt", file);
15557 fputs ("nle", file);
15560 fputs ("ule", file);
15563 fputs ("ult", file);
15566 fputs ("une", file);
15569 output_operand_lossage ("operand is not a condition code, "
15570 "invalid operand code 'Y'");
15576 /* Little bit of braindamage here. The SSE compare instructions
15577 does use completely different names for the comparisons that the
15578 fp conditional moves. */
15579 switch (GET_CODE (x))
15584 fputs ("eq_us", file);
15588 fputs ("eq", file);
15593 fputs ("nge", file);
15597 fputs ("lt", file);
15602 fputs ("ngt", file);
15606 fputs ("le", file);
15609 fputs ("unord", file);
15614 fputs ("neq_oq", file);
15618 fputs ("neq", file);
15623 fputs ("ge", file);
15627 fputs ("nlt", file);
15632 fputs ("gt", file);
15636 fputs ("nle", file);
15639 fputs ("ord", file);
15642 output_operand_lossage ("operand is not a condition code, "
15643 "invalid operand code 'D'");
15650 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15651 if (ASSEMBLER_DIALECT == ASM_ATT)
15657 if (!COMPARISON_P (x))
15659 output_operand_lossage ("operand is not a condition code, "
15660 "invalid operand code '%c'", code);
15663 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15664 code == 'c' || code == 'f',
15665 code == 'F' || code == 'f',
15670 if (!offsettable_memref_p (x))
15672 output_operand_lossage ("operand is not an offsettable memory "
15673 "reference, invalid operand code 'H'");
15676 /* It doesn't actually matter what mode we use here, as we're
15677 only going to use this for printing. */
15678 x = adjust_address_nv (x, DImode, 8);
15679 /* Output 'qword ptr' for intel assembler dialect. */
15680 if (ASSEMBLER_DIALECT == ASM_INTEL)
15685 gcc_assert (CONST_INT_P (x));
15687 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15688 #ifdef HAVE_AS_IX86_HLE
15689 fputs ("xacquire ", file);
15691 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15693 else if (INTVAL (x) & IX86_HLE_RELEASE)
15694 #ifdef HAVE_AS_IX86_HLE
15695 fputs ("xrelease ", file);
15697 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15699 /* We do not want to print value of the operand. */
15703 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15704 fputs ("{z}", file);
15708 gcc_assert (CONST_INT_P (x));
15709 gcc_assert (INTVAL (x) == ROUND_SAE);
15711 if (ASSEMBLER_DIALECT == ASM_INTEL)
15712 fputs (", ", file);
15714 fputs ("{sae}", file);
15716 if (ASSEMBLER_DIALECT == ASM_ATT)
15717 fputs (", ", file);
15722 gcc_assert (CONST_INT_P (x));
15724 if (ASSEMBLER_DIALECT == ASM_INTEL)
15725 fputs (", ", file);
15727 switch (INTVAL (x))
15729 case ROUND_NEAREST_INT | ROUND_SAE:
15730 fputs ("{rn-sae}", file);
15732 case ROUND_NEG_INF | ROUND_SAE:
15733 fputs ("{rd-sae}", file);
15735 case ROUND_POS_INF | ROUND_SAE:
15736 fputs ("{ru-sae}", file);
15738 case ROUND_ZERO | ROUND_SAE:
15739 fputs ("{rz-sae}", file);
15742 gcc_unreachable ();
15745 if (ASSEMBLER_DIALECT == ASM_ATT)
15746 fputs (", ", file);
15751 if (ASSEMBLER_DIALECT == ASM_ATT)
15757 const char *name = get_some_local_dynamic_name ();
15759 output_operand_lossage ("'%%&' used without any "
15760 "local dynamic TLS references");
15762 assemble_name (file, name);
15771 || optimize_function_for_size_p (cfun)
15772 || !TARGET_BRANCH_PREDICTION_HINTS)
15775 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15778 int pred_val = XINT (x, 0);
15780 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15781 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15783 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15785 = final_forward_branch_p (current_output_insn) == 0;
15787 /* Emit hints only in the case default branch prediction
15788 heuristics would fail. */
15789 if (taken != cputaken)
15791 /* We use 3e (DS) prefix for taken branches and
15792 2e (CS) prefix for not taken branches. */
15794 fputs ("ds ; ", file);
15796 fputs ("cs ; ", file);
15804 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15810 if (ASSEMBLER_DIALECT == ASM_ATT)
15813 /* The kernel uses a different segment register for performance
15814 reasons; a system call would not have to trash the userspace
15815 segment register, which would be expensive. */
15816 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15817 fputs ("fs", file);
15819 fputs ("gs", file);
15823 putc (TARGET_AVX2 ? 'i' : 'f', file);
15827 if (TARGET_64BIT && Pmode != word_mode)
15828 fputs ("addr32 ", file);
15832 if (ix86_bnd_prefixed_insn_p (current_output_insn))
15833 fputs ("bnd ", file);
15837 output_operand_lossage ("invalid operand code '%c'", code);
15842 print_reg (x, code, file);
15844 else if (MEM_P (x))
15846 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15847 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15848 && GET_MODE (x) != BLKmode)
15851 switch (GET_MODE_SIZE (GET_MODE (x)))
15853 case 1: size = "BYTE"; break;
15854 case 2: size = "WORD"; break;
15855 case 4: size = "DWORD"; break;
15856 case 8: size = "QWORD"; break;
15857 case 12: size = "TBYTE"; break;
15859 if (GET_MODE (x) == XFmode)
15864 case 32: size = "YMMWORD"; break;
15865 case 64: size = "ZMMWORD"; break;
15867 gcc_unreachable ();
15870 /* Check for explicit size override (codes 'b', 'w', 'k',
15874 else if (code == 'w')
15876 else if (code == 'k')
15878 else if (code == 'q')
15880 else if (code == 'x')
15883 fputs (size, file);
15884 fputs (" PTR ", file);
15888 /* Avoid (%rip) for call operands. */
15889 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15890 && !CONST_INT_P (x))
15891 output_addr_const (file, x);
15892 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15893 output_operand_lossage ("invalid constraints for operand");
15895 output_address (x);
15898 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15903 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15904 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15906 if (ASSEMBLER_DIALECT == ASM_ATT)
15908 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15910 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15911 (unsigned long long) (int) l);
15913 fprintf (file, "0x%08x", (unsigned int) l);
15916 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15921 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15922 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15924 if (ASSEMBLER_DIALECT == ASM_ATT)
15926 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15929 /* These float cases don't actually occur as immediate operands. */
15930 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15934 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15935 fputs (dstr, file);
15940 /* We have patterns that allow zero sets of memory, for instance.
15941 In 64-bit mode, we should probably support all 8-byte vectors,
15942 since we can in fact encode that into an immediate. */
15943 if (GET_CODE (x) == CONST_VECTOR)
15945 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15949 if (code != 'P' && code != 'p')
15951 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15953 if (ASSEMBLER_DIALECT == ASM_ATT)
15956 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15957 || GET_CODE (x) == LABEL_REF)
15959 if (ASSEMBLER_DIALECT == ASM_ATT)
15962 fputs ("OFFSET FLAT:", file);
15965 if (CONST_INT_P (x))
15966 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15967 else if (flag_pic || MACHOPIC_INDIRECT)
15968 output_pic_addr_const (file, x, code);
15970 output_addr_const (file, x);
15975 ix86_print_operand_punct_valid_p (unsigned char code)
15977 return (code == '@' || code == '*' || code == '+' || code == '&'
15978 || code == ';' || code == '~' || code == '^' || code == '!');
15981 /* Print a memory operand whose address is ADDR. */
15984 ix86_print_operand_address (FILE *file, rtx addr)
15986 struct ix86_address parts;
15987 rtx base, index, disp;
15993 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15995 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15996 gcc_assert (parts.index == NULL_RTX);
15997 parts.index = XVECEXP (addr, 0, 1);
15998 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15999 addr = XVECEXP (addr, 0, 0);
16002 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
16004 gcc_assert (TARGET_64BIT);
16005 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16008 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
16010 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
16011 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
16012 if (parts.base != NULL_RTX)
16014 parts.index = parts.base;
16017 parts.base = XVECEXP (addr, 0, 0);
16018 addr = XVECEXP (addr, 0, 0);
16020 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
16022 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16023 gcc_assert (parts.index == NULL_RTX);
16024 parts.index = XVECEXP (addr, 0, 1);
16025 addr = XVECEXP (addr, 0, 0);
16028 ok = ix86_decompose_address (addr, &parts);
16033 index = parts.index;
16035 scale = parts.scale;
16043 if (ASSEMBLER_DIALECT == ASM_ATT)
16045 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16048 gcc_unreachable ();
16051 /* Use one byte shorter RIP relative addressing for 64bit mode. */
16052 if (TARGET_64BIT && !base && !index)
16056 if (GET_CODE (disp) == CONST
16057 && GET_CODE (XEXP (disp, 0)) == PLUS
16058 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16059 symbol = XEXP (XEXP (disp, 0), 0);
16061 if (GET_CODE (symbol) == LABEL_REF
16062 || (GET_CODE (symbol) == SYMBOL_REF
16063 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16066 if (!base && !index)
16068 /* Displacement only requires special attention. */
16070 if (CONST_INT_P (disp))
16072 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16073 fputs ("ds:", file);
16074 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16077 output_pic_addr_const (file, disp, 0);
16079 output_addr_const (file, disp);
16083 /* Print SImode register names to force addr32 prefix. */
16084 if (SImode_address_operand (addr, VOIDmode))
16086 #ifdef ENABLE_CHECKING
16087 gcc_assert (TARGET_64BIT);
16088 switch (GET_CODE (addr))
16091 gcc_assert (GET_MODE (addr) == SImode);
16092 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16096 gcc_assert (GET_MODE (addr) == DImode);
16099 gcc_unreachable ();
16102 gcc_assert (!code);
16108 && CONST_INT_P (disp)
16109 && INTVAL (disp) < -16*1024*1024)
16111 /* X32 runs in 64-bit mode, where displacement, DISP, in
16112 address DISP(%r64), is encoded as 32-bit immediate sign-
16113 extended from 32-bit to 64-bit. For -0x40000300(%r64),
16114 address is %r64 + 0xffffffffbffffd00. When %r64 <
16115 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16116 which is invalid for x32. The correct address is %r64
16117 - 0x40000300 == 0xf7ffdd64. To properly encode
16118 -0x40000300(%r64) for x32, we zero-extend negative
16119 displacement by forcing addr32 prefix which truncates
16120 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16121 zero-extend all negative displacements, including -1(%rsp).
16122 However, for small negative displacements, sign-extension
16123 won't cause overflow. We only zero-extend negative
16124 displacements if they < -16*1024*1024, which is also used
16125 to check legitimate address displacements for PIC. */
16129 if (ASSEMBLER_DIALECT == ASM_ATT)
16134 output_pic_addr_const (file, disp, 0);
16135 else if (GET_CODE (disp) == LABEL_REF)
16136 output_asm_label (disp);
16138 output_addr_const (file, disp);
16143 print_reg (base, code, file);
16147 print_reg (index, vsib ? 0 : code, file);
16148 if (scale != 1 || vsib)
16149 fprintf (file, ",%d", scale);
16155 rtx offset = NULL_RTX;
16159 /* Pull out the offset of a symbol; print any symbol itself. */
16160 if (GET_CODE (disp) == CONST
16161 && GET_CODE (XEXP (disp, 0)) == PLUS
16162 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16164 offset = XEXP (XEXP (disp, 0), 1);
16165 disp = gen_rtx_CONST (VOIDmode,
16166 XEXP (XEXP (disp, 0), 0));
16170 output_pic_addr_const (file, disp, 0);
16171 else if (GET_CODE (disp) == LABEL_REF)
16172 output_asm_label (disp);
16173 else if (CONST_INT_P (disp))
16176 output_addr_const (file, disp);
16182 print_reg (base, code, file);
16185 if (INTVAL (offset) >= 0)
16187 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16191 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16198 print_reg (index, vsib ? 0 : code, file);
16199 if (scale != 1 || vsib)
16200 fprintf (file, "*%d", scale);
16207 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16210 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16214 if (GET_CODE (x) != UNSPEC)
16217 op = XVECEXP (x, 0, 0);
16218 switch (XINT (x, 1))
16220 case UNSPEC_GOTTPOFF:
16221 output_addr_const (file, op);
16222 /* FIXME: This might be @TPOFF in Sun ld. */
16223 fputs ("@gottpoff", file);
16226 output_addr_const (file, op);
16227 fputs ("@tpoff", file);
16229 case UNSPEC_NTPOFF:
16230 output_addr_const (file, op);
16232 fputs ("@tpoff", file);
16234 fputs ("@ntpoff", file);
16236 case UNSPEC_DTPOFF:
16237 output_addr_const (file, op);
16238 fputs ("@dtpoff", file);
16240 case UNSPEC_GOTNTPOFF:
16241 output_addr_const (file, op);
16243 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16244 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16246 fputs ("@gotntpoff", file);
16248 case UNSPEC_INDNTPOFF:
16249 output_addr_const (file, op);
16250 fputs ("@indntpoff", file);
16253 case UNSPEC_MACHOPIC_OFFSET:
16254 output_addr_const (file, op);
16256 machopic_output_function_base_name (file);
16260 case UNSPEC_STACK_CHECK:
16264 gcc_assert (flag_split_stack);
16266 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16267 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16269 gcc_unreachable ();
16272 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16283 /* Split one or more double-mode RTL references into pairs of half-mode
16284 references. The RTL can be REG, offsettable MEM, integer constant, or
16285 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16286 split and "num" is its length. lo_half and hi_half are output arrays
16287 that parallel "operands". */
16290 split_double_mode (machine_mode mode, rtx operands[],
16291 int num, rtx lo_half[], rtx hi_half[])
16293 machine_mode half_mode;
16299 half_mode = DImode;
16302 half_mode = SImode;
16305 gcc_unreachable ();
16308 byte = GET_MODE_SIZE (half_mode);
16312 rtx op = operands[num];
16314 /* simplify_subreg refuse to split volatile memory addresses,
16315 but we still have to handle it. */
16318 lo_half[num] = adjust_address (op, half_mode, 0);
16319 hi_half[num] = adjust_address (op, half_mode, byte);
16323 lo_half[num] = simplify_gen_subreg (half_mode, op,
16324 GET_MODE (op) == VOIDmode
16325 ? mode : GET_MODE (op), 0);
16326 hi_half[num] = simplify_gen_subreg (half_mode, op,
16327 GET_MODE (op) == VOIDmode
16328 ? mode : GET_MODE (op), byte);
16333 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16334 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16335 is the expression of the binary operation. The output may either be
16336 emitted here, or returned to the caller, like all output_* functions.
16338 There is no guarantee that the operands are the same mode, as they
16339 might be within FLOAT or FLOAT_EXTEND expressions. */
16341 #ifndef SYSV386_COMPAT
16342 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16343 wants to fix the assemblers because that causes incompatibility
16344 with gcc. No-one wants to fix gcc because that causes
16345 incompatibility with assemblers... You can use the option of
16346 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16347 #define SYSV386_COMPAT 1
16351 output_387_binary_op (rtx insn, rtx *operands)
16353 static char buf[40];
16356 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16358 #ifdef ENABLE_CHECKING
16359 /* Even if we do not want to check the inputs, this documents input
16360 constraints. Which helps in understanding the following code. */
16361 if (STACK_REG_P (operands[0])
16362 && ((REG_P (operands[1])
16363 && REGNO (operands[0]) == REGNO (operands[1])
16364 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16365 || (REG_P (operands[2])
16366 && REGNO (operands[0]) == REGNO (operands[2])
16367 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16368 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16371 gcc_assert (is_sse);
16374 switch (GET_CODE (operands[3]))
16377 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16378 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16386 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16387 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16395 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16396 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16404 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16405 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16413 gcc_unreachable ();
16420 strcpy (buf, ssep);
16421 if (GET_MODE (operands[0]) == SFmode)
16422 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16424 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16428 strcpy (buf, ssep + 1);
16429 if (GET_MODE (operands[0]) == SFmode)
16430 strcat (buf, "ss\t{%2, %0|%0, %2}");
16432 strcat (buf, "sd\t{%2, %0|%0, %2}");
16438 switch (GET_CODE (operands[3]))
16442 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16443 std::swap (operands[1], operands[2]);
16445 /* know operands[0] == operands[1]. */
16447 if (MEM_P (operands[2]))
16453 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16455 if (STACK_TOP_P (operands[0]))
16456 /* How is it that we are storing to a dead operand[2]?
16457 Well, presumably operands[1] is dead too. We can't
16458 store the result to st(0) as st(0) gets popped on this
16459 instruction. Instead store to operands[2] (which I
16460 think has to be st(1)). st(1) will be popped later.
16461 gcc <= 2.8.1 didn't have this check and generated
16462 assembly code that the Unixware assembler rejected. */
16463 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16465 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16469 if (STACK_TOP_P (operands[0]))
16470 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16472 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16477 if (MEM_P (operands[1]))
16483 if (MEM_P (operands[2]))
16489 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16492 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16493 derived assemblers, confusingly reverse the direction of
16494 the operation for fsub{r} and fdiv{r} when the
16495 destination register is not st(0). The Intel assembler
16496 doesn't have this brain damage. Read !SYSV386_COMPAT to
16497 figure out what the hardware really does. */
16498 if (STACK_TOP_P (operands[0]))
16499 p = "{p\t%0, %2|rp\t%2, %0}";
16501 p = "{rp\t%2, %0|p\t%0, %2}";
16503 if (STACK_TOP_P (operands[0]))
16504 /* As above for fmul/fadd, we can't store to st(0). */
16505 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16507 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16512 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16515 if (STACK_TOP_P (operands[0]))
16516 p = "{rp\t%0, %1|p\t%1, %0}";
16518 p = "{p\t%1, %0|rp\t%0, %1}";
16520 if (STACK_TOP_P (operands[0]))
16521 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16523 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16528 if (STACK_TOP_P (operands[0]))
16530 if (STACK_TOP_P (operands[1]))
16531 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16533 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16536 else if (STACK_TOP_P (operands[1]))
16539 p = "{\t%1, %0|r\t%0, %1}";
16541 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16547 p = "{r\t%2, %0|\t%0, %2}";
16549 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16555 gcc_unreachable ();
16562 /* Check if a 256bit AVX register is referenced inside of EXP. */
16565 ix86_check_avx256_register (const_rtx exp)
16567 if (GET_CODE (exp) == SUBREG)
16568 exp = SUBREG_REG (exp);
16570 return (REG_P (exp)
16571 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16574 /* Return needed mode for entity in optimize_mode_switching pass. */
16577 ix86_avx_u128_mode_needed (rtx_insn *insn)
16583 /* Needed mode is set to AVX_U128_CLEAN if there are
16584 no 256bit modes used in function arguments. */
16585 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16587 link = XEXP (link, 1))
16589 if (GET_CODE (XEXP (link, 0)) == USE)
16591 rtx arg = XEXP (XEXP (link, 0), 0);
16593 if (ix86_check_avx256_register (arg))
16594 return AVX_U128_DIRTY;
16598 return AVX_U128_CLEAN;
16601 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16602 changes state only when a 256bit register is written to, but we need
16603 to prevent the compiler from moving optimal insertion point above
16604 eventual read from 256bit register. */
16605 subrtx_iterator::array_type array;
16606 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16607 if (ix86_check_avx256_register (*iter))
16608 return AVX_U128_DIRTY;
16610 return AVX_U128_ANY;
16613 /* Return mode that i387 must be switched into
16614 prior to the execution of insn. */
16617 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16619 enum attr_i387_cw mode;
16621 /* The mode UNINITIALIZED is used to store control word after a
16622 function call or ASM pattern. The mode ANY specify that function
16623 has no requirements on the control word and make no changes in the
16624 bits we are interested in. */
16627 || (NONJUMP_INSN_P (insn)
16628 && (asm_noperands (PATTERN (insn)) >= 0
16629 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16630 return I387_CW_UNINITIALIZED;
16632 if (recog_memoized (insn) < 0)
16633 return I387_CW_ANY;
16635 mode = get_attr_i387_cw (insn);
16640 if (mode == I387_CW_TRUNC)
16645 if (mode == I387_CW_FLOOR)
16650 if (mode == I387_CW_CEIL)
16655 if (mode == I387_CW_MASK_PM)
16660 gcc_unreachable ();
16663 return I387_CW_ANY;
16666 /* Return mode that entity must be switched into
16667 prior to the execution of insn. */
16670 ix86_mode_needed (int entity, rtx_insn *insn)
16675 return ix86_avx_u128_mode_needed (insn);
16680 return ix86_i387_mode_needed (entity, insn);
16682 gcc_unreachable ();
16687 /* Check if a 256bit AVX register is referenced in stores. */
16690 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16692 if (ix86_check_avx256_register (dest))
16694 bool *used = (bool *) data;
16699 /* Calculate mode of upper 128bit AVX registers after the insn. */
16702 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16704 rtx pat = PATTERN (insn);
16706 if (vzeroupper_operation (pat, VOIDmode)
16707 || vzeroall_operation (pat, VOIDmode))
16708 return AVX_U128_CLEAN;
16710 /* We know that state is clean after CALL insn if there are no
16711 256bit registers used in the function return register. */
16714 bool avx_reg256_found = false;
16715 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16717 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16720 /* Otherwise, return current mode. Remember that if insn
16721 references AVX 256bit registers, the mode was already changed
16722 to DIRTY from MODE_NEEDED. */
16726 /* Return the mode that an insn results in. */
16729 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16734 return ix86_avx_u128_mode_after (mode, insn);
16741 gcc_unreachable ();
16746 ix86_avx_u128_mode_entry (void)
16750 /* Entry mode is set to AVX_U128_DIRTY if there are
16751 256bit modes used in function arguments. */
16752 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16753 arg = TREE_CHAIN (arg))
16755 rtx incoming = DECL_INCOMING_RTL (arg);
16757 if (incoming && ix86_check_avx256_register (incoming))
16758 return AVX_U128_DIRTY;
16761 return AVX_U128_CLEAN;
16764 /* Return a mode that ENTITY is assumed to be
16765 switched to at function entry. */
16768 ix86_mode_entry (int entity)
16773 return ix86_avx_u128_mode_entry ();
16778 return I387_CW_ANY;
16780 gcc_unreachable ();
16785 ix86_avx_u128_mode_exit (void)
16787 rtx reg = crtl->return_rtx;
16789 /* Exit mode is set to AVX_U128_DIRTY if there are
16790 256bit modes used in the function return register. */
16791 if (reg && ix86_check_avx256_register (reg))
16792 return AVX_U128_DIRTY;
16794 return AVX_U128_CLEAN;
16797 /* Return a mode that ENTITY is assumed to be
16798 switched to at function exit. */
16801 ix86_mode_exit (int entity)
16806 return ix86_avx_u128_mode_exit ();
16811 return I387_CW_ANY;
16813 gcc_unreachable ();
16818 ix86_mode_priority (int, int n)
16823 /* Output code to initialize control word copies used by trunc?f?i and
16824 rounding patterns. CURRENT_MODE is set to current control word,
16825 while NEW_MODE is set to new control word. */
16828 emit_i387_cw_initialization (int mode)
16830 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16833 enum ix86_stack_slot slot;
16835 rtx reg = gen_reg_rtx (HImode);
16837 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16838 emit_move_insn (reg, copy_rtx (stored_mode));
16840 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16841 || optimize_insn_for_size_p ())
16845 case I387_CW_TRUNC:
16846 /* round toward zero (truncate) */
16847 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16848 slot = SLOT_CW_TRUNC;
16851 case I387_CW_FLOOR:
16852 /* round down toward -oo */
16853 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16854 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16855 slot = SLOT_CW_FLOOR;
16859 /* round up toward +oo */
16860 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16861 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16862 slot = SLOT_CW_CEIL;
16865 case I387_CW_MASK_PM:
16866 /* mask precision exception for nearbyint() */
16867 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16868 slot = SLOT_CW_MASK_PM;
16872 gcc_unreachable ();
16879 case I387_CW_TRUNC:
16880 /* round toward zero (truncate) */
16881 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16882 slot = SLOT_CW_TRUNC;
16885 case I387_CW_FLOOR:
16886 /* round down toward -oo */
16887 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16888 slot = SLOT_CW_FLOOR;
16892 /* round up toward +oo */
16893 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16894 slot = SLOT_CW_CEIL;
16897 case I387_CW_MASK_PM:
16898 /* mask precision exception for nearbyint() */
16899 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16900 slot = SLOT_CW_MASK_PM;
16904 gcc_unreachable ();
16908 gcc_assert (slot < MAX_386_STACK_LOCALS);
16910 new_mode = assign_386_stack_local (HImode, slot);
16911 emit_move_insn (new_mode, reg);
16914 /* Emit vzeroupper. */
16917 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16921 /* Cancel automatic vzeroupper insertion if there are
16922 live call-saved SSE registers at the insertion point. */
16924 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16925 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16929 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16930 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16933 emit_insn (gen_avx_vzeroupper ());
16936 /* Generate one or more insns to set ENTITY to MODE. */
16938 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16939 is the set of hard registers live at the point where the insn(s)
16940 are to be inserted. */
16943 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16944 HARD_REG_SET regs_live)
16949 if (mode == AVX_U128_CLEAN)
16950 ix86_avx_emit_vzeroupper (regs_live);
16956 if (mode != I387_CW_ANY
16957 && mode != I387_CW_UNINITIALIZED)
16958 emit_i387_cw_initialization (mode);
16961 gcc_unreachable ();
16965 /* Output code for INSN to convert a float to a signed int. OPERANDS
16966 are the insn operands. The output may be [HSD]Imode and the input
16967 operand may be [SDX]Fmode. */
16970 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
16972 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16973 int dimode_p = GET_MODE (operands[0]) == DImode;
16974 int round_mode = get_attr_i387_cw (insn);
16976 /* Jump through a hoop or two for DImode, since the hardware has no
16977 non-popping instruction. We used to do this a different way, but
16978 that was somewhat fragile and broke with post-reload splitters. */
16979 if ((dimode_p || fisttp) && !stack_top_dies)
16980 output_asm_insn ("fld\t%y1", operands);
16982 gcc_assert (STACK_TOP_P (operands[1]));
16983 gcc_assert (MEM_P (operands[0]));
16984 gcc_assert (GET_MODE (operands[1]) != TFmode);
16987 output_asm_insn ("fisttp%Z0\t%0", operands);
16990 if (round_mode != I387_CW_ANY)
16991 output_asm_insn ("fldcw\t%3", operands);
16992 if (stack_top_dies || dimode_p)
16993 output_asm_insn ("fistp%Z0\t%0", operands);
16995 output_asm_insn ("fist%Z0\t%0", operands);
16996 if (round_mode != I387_CW_ANY)
16997 output_asm_insn ("fldcw\t%2", operands);
17003 /* Output code for x87 ffreep insn. The OPNO argument, which may only
17004 have the values zero or one, indicates the ffreep insn's operand
17005 from the OPERANDS array. */
17007 static const char *
17008 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
17010 if (TARGET_USE_FFREEP)
17011 #ifdef HAVE_AS_IX86_FFREEP
17012 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
17015 static char retval[32];
17016 int regno = REGNO (operands[opno]);
17018 gcc_assert (STACK_REGNO_P (regno));
17020 regno -= FIRST_STACK_REG;
17022 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
17027 return opno ? "fstp\t%y1" : "fstp\t%y0";
17031 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
17032 should be used. UNORDERED_P is true when fucom should be used. */
17035 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
17037 int stack_top_dies;
17038 rtx cmp_op0, cmp_op1;
17039 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
17043 cmp_op0 = operands[0];
17044 cmp_op1 = operands[1];
17048 cmp_op0 = operands[1];
17049 cmp_op1 = operands[2];
17054 if (GET_MODE (operands[0]) == SFmode)
17056 return "%vucomiss\t{%1, %0|%0, %1}";
17058 return "%vcomiss\t{%1, %0|%0, %1}";
17061 return "%vucomisd\t{%1, %0|%0, %1}";
17063 return "%vcomisd\t{%1, %0|%0, %1}";
17066 gcc_assert (STACK_TOP_P (cmp_op0));
17068 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17070 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17072 if (stack_top_dies)
17074 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17075 return output_387_ffreep (operands, 1);
17078 return "ftst\n\tfnstsw\t%0";
17081 if (STACK_REG_P (cmp_op1)
17083 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17084 && REGNO (cmp_op1) != FIRST_STACK_REG)
17086 /* If both the top of the 387 stack dies, and the other operand
17087 is also a stack register that dies, then this must be a
17088 `fcompp' float compare */
17092 /* There is no double popping fcomi variant. Fortunately,
17093 eflags is immune from the fstp's cc clobbering. */
17095 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17097 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17098 return output_387_ffreep (operands, 0);
17103 return "fucompp\n\tfnstsw\t%0";
17105 return "fcompp\n\tfnstsw\t%0";
17110 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
17112 static const char * const alt[16] =
17114 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17115 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17116 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17117 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17119 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17120 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17124 "fcomi\t{%y1, %0|%0, %y1}",
17125 "fcomip\t{%y1, %0|%0, %y1}",
17126 "fucomi\t{%y1, %0|%0, %y1}",
17127 "fucomip\t{%y1, %0|%0, %y1}",
17138 mask = eflags_p << 3;
17139 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17140 mask |= unordered_p << 1;
17141 mask |= stack_top_dies;
17143 gcc_assert (mask < 16);
17152 ix86_output_addr_vec_elt (FILE *file, int value)
17154 const char *directive = ASM_LONG;
17158 directive = ASM_QUAD;
17160 gcc_assert (!TARGET_64BIT);
17163 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17167 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17169 const char *directive = ASM_LONG;
17172 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17173 directive = ASM_QUAD;
17175 gcc_assert (!TARGET_64BIT);
17177 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17178 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17179 fprintf (file, "%s%s%d-%s%d\n",
17180 directive, LPREFIX, value, LPREFIX, rel);
17181 else if (HAVE_AS_GOTOFF_IN_DATA)
17182 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17184 else if (TARGET_MACHO)
17186 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17187 machopic_output_function_base_name (file);
17192 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17193 GOT_SYMBOL_NAME, LPREFIX, value);
17196 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17200 ix86_expand_clear (rtx dest)
17204 /* We play register width games, which are only valid after reload. */
17205 gcc_assert (reload_completed);
17207 /* Avoid HImode and its attendant prefix byte. */
17208 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17209 dest = gen_rtx_REG (SImode, REGNO (dest));
17210 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
17212 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17214 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17215 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17221 /* X is an unchanging MEM. If it is a constant pool reference, return
17222 the constant pool rtx, else NULL. */
17225 maybe_get_pool_constant (rtx x)
17227 x = ix86_delegitimize_address (XEXP (x, 0));
17229 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17230 return get_pool_constant (x);
17236 ix86_expand_move (machine_mode mode, rtx operands[])
17239 enum tls_model model;
17244 if (GET_CODE (op1) == SYMBOL_REF)
17248 model = SYMBOL_REF_TLS_MODEL (op1);
17251 op1 = legitimize_tls_address (op1, model, true);
17252 op1 = force_operand (op1, op0);
17255 op1 = convert_to_mode (mode, op1, 1);
17257 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17260 else if (GET_CODE (op1) == CONST
17261 && GET_CODE (XEXP (op1, 0)) == PLUS
17262 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17264 rtx addend = XEXP (XEXP (op1, 0), 1);
17265 rtx symbol = XEXP (XEXP (op1, 0), 0);
17268 model = SYMBOL_REF_TLS_MODEL (symbol);
17270 tmp = legitimize_tls_address (symbol, model, true);
17272 tmp = legitimize_pe_coff_symbol (symbol, true);
17276 tmp = force_operand (tmp, NULL);
17277 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17278 op0, 1, OPTAB_DIRECT);
17281 op1 = convert_to_mode (mode, tmp, 1);
17285 if ((flag_pic || MACHOPIC_INDIRECT)
17286 && symbolic_operand (op1, mode))
17288 if (TARGET_MACHO && !TARGET_64BIT)
17291 /* dynamic-no-pic */
17292 if (MACHOPIC_INDIRECT)
17294 rtx temp = ((reload_in_progress
17295 || ((op0 && REG_P (op0))
17297 ? op0 : gen_reg_rtx (Pmode));
17298 op1 = machopic_indirect_data_reference (op1, temp);
17300 op1 = machopic_legitimize_pic_address (op1, mode,
17301 temp == op1 ? 0 : temp);
17303 if (op0 != op1 && GET_CODE (op0) != MEM)
17305 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
17309 if (GET_CODE (op0) == MEM)
17310 op1 = force_reg (Pmode, op1);
17314 if (GET_CODE (temp) != REG)
17315 temp = gen_reg_rtx (Pmode);
17316 temp = legitimize_pic_address (op1, temp);
17321 /* dynamic-no-pic */
17327 op1 = force_reg (mode, op1);
17328 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17330 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17331 op1 = legitimize_pic_address (op1, reg);
17334 op1 = convert_to_mode (mode, op1, 1);
17341 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17342 || !push_operand (op0, mode))
17344 op1 = force_reg (mode, op1);
17346 if (push_operand (op0, mode)
17347 && ! general_no_elim_operand (op1, mode))
17348 op1 = copy_to_mode_reg (mode, op1);
17350 /* Force large constants in 64bit compilation into register
17351 to get them CSEed. */
17352 if (can_create_pseudo_p ()
17353 && (mode == DImode) && TARGET_64BIT
17354 && immediate_operand (op1, mode)
17355 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17356 && !register_operand (op0, mode)
17358 op1 = copy_to_mode_reg (mode, op1);
17360 if (can_create_pseudo_p ()
17361 && FLOAT_MODE_P (mode)
17362 && GET_CODE (op1) == CONST_DOUBLE)
17364 /* If we are loading a floating point constant to a register,
17365 force the value to memory now, since we'll get better code
17366 out the back end. */
17368 op1 = validize_mem (force_const_mem (mode, op1));
17369 if (!register_operand (op0, mode))
17371 rtx temp = gen_reg_rtx (mode);
17372 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
17373 emit_move_insn (op0, temp);
17379 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17383 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17385 rtx op0 = operands[0], op1 = operands[1];
17386 unsigned int align = GET_MODE_ALIGNMENT (mode);
17388 if (push_operand (op0, VOIDmode))
17389 op0 = emit_move_resolve_push (mode, op0);
17391 /* Force constants other than zero into memory. We do not know how
17392 the instructions used to build constants modify the upper 64 bits
17393 of the register, once we have that information we may be able
17394 to handle some of them more efficiently. */
17395 if (can_create_pseudo_p ()
17396 && register_operand (op0, mode)
17397 && (CONSTANT_P (op1)
17398 || (GET_CODE (op1) == SUBREG
17399 && CONSTANT_P (SUBREG_REG (op1))))
17400 && !standard_sse_constant_p (op1))
17401 op1 = validize_mem (force_const_mem (mode, op1));
17403 /* We need to check memory alignment for SSE mode since attribute
17404 can make operands unaligned. */
17405 if (can_create_pseudo_p ()
17406 && SSE_REG_MODE_P (mode)
17407 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17408 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17412 /* ix86_expand_vector_move_misalign() does not like constants ... */
17413 if (CONSTANT_P (op1)
17414 || (GET_CODE (op1) == SUBREG
17415 && CONSTANT_P (SUBREG_REG (op1))))
17416 op1 = validize_mem (force_const_mem (mode, op1));
17418 /* ... nor both arguments in memory. */
17419 if (!register_operand (op0, mode)
17420 && !register_operand (op1, mode))
17421 op1 = force_reg (mode, op1);
17423 tmp[0] = op0; tmp[1] = op1;
17424 ix86_expand_vector_move_misalign (mode, tmp);
17428 /* Make operand1 a register if it isn't already. */
17429 if (can_create_pseudo_p ()
17430 && !register_operand (op0, mode)
17431 && !register_operand (op1, mode))
17433 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17437 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17440 /* Split 32-byte AVX unaligned load and store if needed. */
17443 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17446 rtx (*extract) (rtx, rtx, rtx);
17447 rtx (*load_unaligned) (rtx, rtx);
17448 rtx (*store_unaligned) (rtx, rtx);
17451 switch (GET_MODE (op0))
17454 gcc_unreachable ();
17456 extract = gen_avx_vextractf128v32qi;
17457 load_unaligned = gen_avx_loaddquv32qi;
17458 store_unaligned = gen_avx_storedquv32qi;
17462 extract = gen_avx_vextractf128v8sf;
17463 load_unaligned = gen_avx_loadups256;
17464 store_unaligned = gen_avx_storeups256;
17468 extract = gen_avx_vextractf128v4df;
17469 load_unaligned = gen_avx_loadupd256;
17470 store_unaligned = gen_avx_storeupd256;
17477 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
17478 && optimize_insn_for_speed_p ())
17480 rtx r = gen_reg_rtx (mode);
17481 m = adjust_address (op1, mode, 0);
17482 emit_move_insn (r, m);
17483 m = adjust_address (op1, mode, 16);
17484 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17485 emit_move_insn (op0, r);
17487 /* Normal *mov<mode>_internal pattern will handle
17488 unaligned loads just fine if misaligned_operand
17489 is true, and without the UNSPEC it can be combined
17490 with arithmetic instructions. */
17491 else if (misaligned_operand (op1, GET_MODE (op1)))
17492 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17494 emit_insn (load_unaligned (op0, op1));
17496 else if (MEM_P (op0))
17498 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
17499 && optimize_insn_for_speed_p ())
17501 m = adjust_address (op0, mode, 0);
17502 emit_insn (extract (m, op1, const0_rtx));
17503 m = adjust_address (op0, mode, 16);
17504 emit_insn (extract (m, op1, const1_rtx));
17507 emit_insn (store_unaligned (op0, op1));
17510 gcc_unreachable ();
17513 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17514 straight to ix86_expand_vector_move. */
17515 /* Code generation for scalar reg-reg moves of single and double precision data:
17516 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17520 if (x86_sse_partial_reg_dependency == true)
17525 Code generation for scalar loads of double precision data:
17526 if (x86_sse_split_regs == true)
17527 movlpd mem, reg (gas syntax)
17531 Code generation for unaligned packed loads of single precision data
17532 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17533 if (x86_sse_unaligned_move_optimal)
17536 if (x86_sse_partial_reg_dependency == true)
17548 Code generation for unaligned packed loads of double precision data
17549 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17550 if (x86_sse_unaligned_move_optimal)
17553 if (x86_sse_split_regs == true)
17566 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17568 rtx op0, op1, orig_op0 = NULL_RTX, m;
17569 rtx (*load_unaligned) (rtx, rtx);
17570 rtx (*store_unaligned) (rtx, rtx);
17575 if (GET_MODE_SIZE (mode) == 64)
17577 switch (GET_MODE_CLASS (mode))
17579 case MODE_VECTOR_INT:
17581 if (GET_MODE (op0) != V16SImode)
17586 op0 = gen_reg_rtx (V16SImode);
17589 op0 = gen_lowpart (V16SImode, op0);
17591 op1 = gen_lowpart (V16SImode, op1);
17594 case MODE_VECTOR_FLOAT:
17595 switch (GET_MODE (op0))
17598 gcc_unreachable ();
17600 load_unaligned = gen_avx512f_loaddquv16si;
17601 store_unaligned = gen_avx512f_storedquv16si;
17604 load_unaligned = gen_avx512f_loadups512;
17605 store_unaligned = gen_avx512f_storeups512;
17608 load_unaligned = gen_avx512f_loadupd512;
17609 store_unaligned = gen_avx512f_storeupd512;
17614 emit_insn (load_unaligned (op0, op1));
17615 else if (MEM_P (op0))
17616 emit_insn (store_unaligned (op0, op1));
17618 gcc_unreachable ();
17620 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17624 gcc_unreachable ();
17631 && GET_MODE_SIZE (mode) == 32)
17633 switch (GET_MODE_CLASS (mode))
17635 case MODE_VECTOR_INT:
17637 if (GET_MODE (op0) != V32QImode)
17642 op0 = gen_reg_rtx (V32QImode);
17645 op0 = gen_lowpart (V32QImode, op0);
17647 op1 = gen_lowpart (V32QImode, op1);
17650 case MODE_VECTOR_FLOAT:
17651 ix86_avx256_split_vector_move_misalign (op0, op1);
17653 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17657 gcc_unreachable ();
17665 /* Normal *mov<mode>_internal pattern will handle
17666 unaligned loads just fine if misaligned_operand
17667 is true, and without the UNSPEC it can be combined
17668 with arithmetic instructions. */
17670 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17671 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17672 && misaligned_operand (op1, GET_MODE (op1)))
17673 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17674 /* ??? If we have typed data, then it would appear that using
17675 movdqu is the only way to get unaligned data loaded with
17677 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17679 if (GET_MODE (op0) != V16QImode)
17682 op0 = gen_reg_rtx (V16QImode);
17684 op1 = gen_lowpart (V16QImode, op1);
17685 /* We will eventually emit movups based on insn attributes. */
17686 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17688 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17690 else if (TARGET_SSE2 && mode == V2DFmode)
17695 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17696 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17697 || optimize_insn_for_size_p ())
17699 /* We will eventually emit movups based on insn attributes. */
17700 emit_insn (gen_sse2_loadupd (op0, op1));
17704 /* When SSE registers are split into halves, we can avoid
17705 writing to the top half twice. */
17706 if (TARGET_SSE_SPLIT_REGS)
17708 emit_clobber (op0);
17713 /* ??? Not sure about the best option for the Intel chips.
17714 The following would seem to satisfy; the register is
17715 entirely cleared, breaking the dependency chain. We
17716 then store to the upper half, with a dependency depth
17717 of one. A rumor has it that Intel recommends two movsd
17718 followed by an unpacklpd, but this is unconfirmed. And
17719 given that the dependency depth of the unpacklpd would
17720 still be one, I'm not sure why this would be better. */
17721 zero = CONST0_RTX (V2DFmode);
17724 m = adjust_address (op1, DFmode, 0);
17725 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17726 m = adjust_address (op1, DFmode, 8);
17727 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17734 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17735 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17736 || optimize_insn_for_size_p ())
17738 if (GET_MODE (op0) != V4SFmode)
17741 op0 = gen_reg_rtx (V4SFmode);
17743 op1 = gen_lowpart (V4SFmode, op1);
17744 emit_insn (gen_sse_loadups (op0, op1));
17746 emit_move_insn (orig_op0,
17747 gen_lowpart (GET_MODE (orig_op0), op0));
17751 if (mode != V4SFmode)
17752 t = gen_reg_rtx (V4SFmode);
17756 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17757 emit_move_insn (t, CONST0_RTX (V4SFmode));
17761 m = adjust_address (op1, V2SFmode, 0);
17762 emit_insn (gen_sse_loadlps (t, t, m));
17763 m = adjust_address (op1, V2SFmode, 8);
17764 emit_insn (gen_sse_loadhps (t, t, m));
17765 if (mode != V4SFmode)
17766 emit_move_insn (op0, gen_lowpart (mode, t));
17769 else if (MEM_P (op0))
17771 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17773 op0 = gen_lowpart (V16QImode, op0);
17774 op1 = gen_lowpart (V16QImode, op1);
17775 /* We will eventually emit movups based on insn attributes. */
17776 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17778 else if (TARGET_SSE2 && mode == V2DFmode)
17781 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17782 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17783 || optimize_insn_for_size_p ())
17784 /* We will eventually emit movups based on insn attributes. */
17785 emit_insn (gen_sse2_storeupd (op0, op1));
17788 m = adjust_address (op0, DFmode, 0);
17789 emit_insn (gen_sse2_storelpd (m, op1));
17790 m = adjust_address (op0, DFmode, 8);
17791 emit_insn (gen_sse2_storehpd (m, op1));
17796 if (mode != V4SFmode)
17797 op1 = gen_lowpart (V4SFmode, op1);
17800 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17801 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17802 || optimize_insn_for_size_p ())
17804 op0 = gen_lowpart (V4SFmode, op0);
17805 emit_insn (gen_sse_storeups (op0, op1));
17809 m = adjust_address (op0, V2SFmode, 0);
17810 emit_insn (gen_sse_storelps (m, op1));
17811 m = adjust_address (op0, V2SFmode, 8);
17812 emit_insn (gen_sse_storehps (m, op1));
17817 gcc_unreachable ();
17820 /* Helper function of ix86_fixup_binary_operands to canonicalize
17821 operand order. Returns true if the operands should be swapped. */
17824 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
17827 rtx dst = operands[0];
17828 rtx src1 = operands[1];
17829 rtx src2 = operands[2];
17831 /* If the operation is not commutative, we can't do anything. */
17832 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17835 /* Highest priority is that src1 should match dst. */
17836 if (rtx_equal_p (dst, src1))
17838 if (rtx_equal_p (dst, src2))
17841 /* Next highest priority is that immediate constants come second. */
17842 if (immediate_operand (src2, mode))
17844 if (immediate_operand (src1, mode))
17847 /* Lowest priority is that memory references should come second. */
17857 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17858 destination to use for the operation. If different from the true
17859 destination in operands[0], a copy operation will be required. */
17862 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
17865 rtx dst = operands[0];
17866 rtx src1 = operands[1];
17867 rtx src2 = operands[2];
17869 /* Canonicalize operand order. */
17870 if (ix86_swap_binary_operands_p (code, mode, operands))
17872 /* It is invalid to swap operands of different modes. */
17873 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17875 std::swap (src1, src2);
17878 /* Both source operands cannot be in memory. */
17879 if (MEM_P (src1) && MEM_P (src2))
17881 /* Optimization: Only read from memory once. */
17882 if (rtx_equal_p (src1, src2))
17884 src2 = force_reg (mode, src2);
17887 else if (rtx_equal_p (dst, src1))
17888 src2 = force_reg (mode, src2);
17890 src1 = force_reg (mode, src1);
17893 /* If the destination is memory, and we do not have matching source
17894 operands, do things in registers. */
17895 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17896 dst = gen_reg_rtx (mode);
17898 /* Source 1 cannot be a constant. */
17899 if (CONSTANT_P (src1))
17900 src1 = force_reg (mode, src1);
17902 /* Source 1 cannot be a non-matching memory. */
17903 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17904 src1 = force_reg (mode, src1);
17906 /* Improve address combine. */
17908 && GET_MODE_CLASS (mode) == MODE_INT
17910 src2 = force_reg (mode, src2);
17912 operands[1] = src1;
17913 operands[2] = src2;
17917 /* Similarly, but assume that the destination has already been
17918 set up properly. */
17921 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17922 machine_mode mode, rtx operands[])
17924 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17925 gcc_assert (dst == operands[0]);
17928 /* Attempt to expand a binary operator. Make the expansion closer to the
17929 actual machine, then just general_operand, which will allow 3 separate
17930 memory references (one output, two input) in a single insn. */
17933 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
17936 rtx src1, src2, dst, op, clob;
17938 dst = ix86_fixup_binary_operands (code, mode, operands);
17939 src1 = operands[1];
17940 src2 = operands[2];
17942 /* Emit the instruction. */
17944 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17945 if (reload_in_progress)
17947 /* Reload doesn't know about the flags register, and doesn't know that
17948 it doesn't want to clobber it. We can only do this with PLUS. */
17949 gcc_assert (code == PLUS);
17952 else if (reload_completed
17954 && !rtx_equal_p (dst, src1))
17956 /* This is going to be an LEA; avoid splitting it later. */
17961 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17962 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17965 /* Fix up the destination if needed. */
17966 if (dst != operands[0])
17967 emit_move_insn (operands[0], dst);
17970 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17971 the given OPERANDS. */
17974 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
17977 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17978 if (GET_CODE (operands[1]) == SUBREG)
17983 else if (GET_CODE (operands[2]) == SUBREG)
17988 /* Optimize (__m128i) d | (__m128i) e and similar code
17989 when d and e are float vectors into float vector logical
17990 insn. In C/C++ without using intrinsics there is no other way
17991 to express vector logical operation on float vectors than
17992 to cast them temporarily to integer vectors. */
17994 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17995 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17996 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17997 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17998 && SUBREG_BYTE (op1) == 0
17999 && (GET_CODE (op2) == CONST_VECTOR
18000 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
18001 && SUBREG_BYTE (op2) == 0))
18002 && can_create_pseudo_p ())
18005 switch (GET_MODE (SUBREG_REG (op1)))
18013 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
18014 if (GET_CODE (op2) == CONST_VECTOR)
18016 op2 = gen_lowpart (GET_MODE (dst), op2);
18017 op2 = force_reg (GET_MODE (dst), op2);
18022 op2 = SUBREG_REG (operands[2]);
18023 if (!nonimmediate_operand (op2, GET_MODE (dst)))
18024 op2 = force_reg (GET_MODE (dst), op2);
18026 op1 = SUBREG_REG (op1);
18027 if (!nonimmediate_operand (op1, GET_MODE (dst)))
18028 op1 = force_reg (GET_MODE (dst), op1);
18029 emit_insn (gen_rtx_SET (VOIDmode, dst,
18030 gen_rtx_fmt_ee (code, GET_MODE (dst),
18032 emit_move_insn (operands[0], gen_lowpart (mode, dst));
18038 if (!nonimmediate_operand (operands[1], mode))
18039 operands[1] = force_reg (mode, operands[1]);
18040 if (!nonimmediate_operand (operands[2], mode))
18041 operands[2] = force_reg (mode, operands[2]);
18042 ix86_fixup_binary_operands_no_copy (code, mode, operands);
18043 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18044 gen_rtx_fmt_ee (code, mode, operands[1],
18048 /* Return TRUE or FALSE depending on whether the binary operator meets the
18049 appropriate constraints. */
18052 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18055 rtx dst = operands[0];
18056 rtx src1 = operands[1];
18057 rtx src2 = operands[2];
18059 /* Both source operands cannot be in memory. */
18060 if (MEM_P (src1) && MEM_P (src2))
18063 /* Canonicalize operand order for commutative operators. */
18064 if (ix86_swap_binary_operands_p (code, mode, operands))
18065 std::swap (src1, src2);
18067 /* If the destination is memory, we must have a matching source operand. */
18068 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18071 /* Source 1 cannot be a constant. */
18072 if (CONSTANT_P (src1))
18075 /* Source 1 cannot be a non-matching memory. */
18076 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18077 /* Support "andhi/andsi/anddi" as a zero-extending move. */
18078 return (code == AND
18081 || (TARGET_64BIT && mode == DImode))
18082 && satisfies_constraint_L (src2));
18087 /* Attempt to expand a unary operator. Make the expansion closer to the
18088 actual machine, then just general_operand, which will allow 2 separate
18089 memory references (one output, one input) in a single insn. */
18092 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18095 bool matching_memory = false;
18096 rtx src, dst, op, clob;
18101 /* If the destination is memory, and we do not have matching source
18102 operands, do things in registers. */
18105 if (rtx_equal_p (dst, src))
18106 matching_memory = true;
18108 dst = gen_reg_rtx (mode);
18111 /* When source operand is memory, destination must match. */
18112 if (MEM_P (src) && !matching_memory)
18113 src = force_reg (mode, src);
18115 /* Emit the instruction. */
18117 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
18118 if (reload_in_progress || code == NOT)
18120 /* Reload doesn't know about the flags register, and doesn't know that
18121 it doesn't want to clobber it. */
18122 gcc_assert (code == NOT);
18127 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18128 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18131 /* Fix up the destination if needed. */
18132 if (dst != operands[0])
18133 emit_move_insn (operands[0], dst);
18136 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18137 divisor are within the range [0-255]. */
18140 ix86_split_idivmod (machine_mode mode, rtx operands[],
18143 rtx_code_label *end_label, *qimode_label;
18144 rtx insn, div, mod;
18145 rtx scratch, tmp0, tmp1, tmp2;
18146 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18147 rtx (*gen_zero_extend) (rtx, rtx);
18148 rtx (*gen_test_ccno_1) (rtx, rtx);
18153 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18154 gen_test_ccno_1 = gen_testsi_ccno_1;
18155 gen_zero_extend = gen_zero_extendqisi2;
18158 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18159 gen_test_ccno_1 = gen_testdi_ccno_1;
18160 gen_zero_extend = gen_zero_extendqidi2;
18163 gcc_unreachable ();
18166 end_label = gen_label_rtx ();
18167 qimode_label = gen_label_rtx ();
18169 scratch = gen_reg_rtx (mode);
18171 /* Use 8bit unsigned divimod if dividend and divisor are within
18172 the range [0-255]. */
18173 emit_move_insn (scratch, operands[2]);
18174 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18175 scratch, 1, OPTAB_DIRECT);
18176 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18177 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18178 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18179 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18180 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18182 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
18183 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18184 JUMP_LABEL (insn) = qimode_label;
18186 /* Generate original signed/unsigned divimod. */
18187 div = gen_divmod4_1 (operands[0], operands[1],
18188 operands[2], operands[3]);
18191 /* Branch to the end. */
18192 emit_jump_insn (gen_jump (end_label));
18195 /* Generate 8bit unsigned divide. */
18196 emit_label (qimode_label);
18197 /* Don't use operands[0] for result of 8bit divide since not all
18198 registers support QImode ZERO_EXTRACT. */
18199 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18200 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18201 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18202 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18206 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18207 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18211 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18212 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18215 /* Extract remainder from AH. */
18216 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18217 if (REG_P (operands[1]))
18218 insn = emit_move_insn (operands[1], tmp1);
18221 /* Need a new scratch register since the old one has result
18223 scratch = gen_reg_rtx (mode);
18224 emit_move_insn (scratch, tmp1);
18225 insn = emit_move_insn (operands[1], scratch);
18227 set_unique_reg_note (insn, REG_EQUAL, mod);
18229 /* Zero extend quotient from AL. */
18230 tmp1 = gen_lowpart (QImode, tmp0);
18231 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18232 set_unique_reg_note (insn, REG_EQUAL, div);
18234 emit_label (end_label);
18237 #define LEA_MAX_STALL (3)
18238 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18240 /* Increase given DISTANCE in half-cycles according to
18241 dependencies between PREV and NEXT instructions.
18242 Add 1 half-cycle if there is no dependency and
18243 go to next cycle if there is some dependecy. */
18245 static unsigned int
18246 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18250 if (!prev || !next)
18251 return distance + (distance & 1) + 2;
18253 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18254 return distance + 1;
18256 FOR_EACH_INSN_USE (use, next)
18257 FOR_EACH_INSN_DEF (def, prev)
18258 if (!DF_REF_IS_ARTIFICIAL (def)
18259 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18260 return distance + (distance & 1) + 2;
18262 return distance + 1;
18265 /* Function checks if instruction INSN defines register number
18266 REGNO1 or REGNO2. */
18269 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18274 FOR_EACH_INSN_DEF (def, insn)
18275 if (DF_REF_REG_DEF_P (def)
18276 && !DF_REF_IS_ARTIFICIAL (def)
18277 && (regno1 == DF_REF_REGNO (def)
18278 || regno2 == DF_REF_REGNO (def)))
18284 /* Function checks if instruction INSN uses register number
18285 REGNO as a part of address expression. */
18288 insn_uses_reg_mem (unsigned int regno, rtx insn)
18292 FOR_EACH_INSN_USE (use, insn)
18293 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18299 /* Search backward for non-agu definition of register number REGNO1
18300 or register number REGNO2 in basic block starting from instruction
18301 START up to head of basic block or instruction INSN.
18303 Function puts true value into *FOUND var if definition was found
18304 and false otherwise.
18306 Distance in half-cycles between START and found instruction or head
18307 of BB is added to DISTANCE and returned. */
18310 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18311 rtx_insn *insn, int distance,
18312 rtx_insn *start, bool *found)
18314 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18315 rtx_insn *prev = start;
18316 rtx_insn *next = NULL;
18322 && distance < LEA_SEARCH_THRESHOLD)
18324 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18326 distance = increase_distance (prev, next, distance);
18327 if (insn_defines_reg (regno1, regno2, prev))
18329 if (recog_memoized (prev) < 0
18330 || get_attr_type (prev) != TYPE_LEA)
18339 if (prev == BB_HEAD (bb))
18342 prev = PREV_INSN (prev);
18348 /* Search backward for non-agu definition of register number REGNO1
18349 or register number REGNO2 in INSN's basic block until
18350 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18351 2. Reach neighbour BBs boundary, or
18352 3. Reach agu definition.
18353 Returns the distance between the non-agu definition point and INSN.
18354 If no definition point, returns -1. */
18357 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18360 basic_block bb = BLOCK_FOR_INSN (insn);
18362 bool found = false;
18364 if (insn != BB_HEAD (bb))
18365 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18366 distance, PREV_INSN (insn),
18369 if (!found && distance < LEA_SEARCH_THRESHOLD)
18373 bool simple_loop = false;
18375 FOR_EACH_EDGE (e, ei, bb->preds)
18378 simple_loop = true;
18383 distance = distance_non_agu_define_in_bb (regno1, regno2,
18385 BB_END (bb), &found);
18388 int shortest_dist = -1;
18389 bool found_in_bb = false;
18391 FOR_EACH_EDGE (e, ei, bb->preds)
18394 = distance_non_agu_define_in_bb (regno1, regno2,
18400 if (shortest_dist < 0)
18401 shortest_dist = bb_dist;
18402 else if (bb_dist > 0)
18403 shortest_dist = MIN (bb_dist, shortest_dist);
18409 distance = shortest_dist;
18413 /* get_attr_type may modify recog data. We want to make sure
18414 that recog data is valid for instruction INSN, on which
18415 distance_non_agu_define is called. INSN is unchanged here. */
18416 extract_insn_cached (insn);
18421 return distance >> 1;
18424 /* Return the distance in half-cycles between INSN and the next
18425 insn that uses register number REGNO in memory address added
18426 to DISTANCE. Return -1 if REGNO0 is set.
18428 Put true value into *FOUND if register usage was found and
18430 Put true value into *REDEFINED if register redefinition was
18431 found and false otherwise. */
18434 distance_agu_use_in_bb (unsigned int regno,
18435 rtx_insn *insn, int distance, rtx_insn *start,
18436 bool *found, bool *redefined)
18438 basic_block bb = NULL;
18439 rtx_insn *next = start;
18440 rtx_insn *prev = NULL;
18443 *redefined = false;
18445 if (start != NULL_RTX)
18447 bb = BLOCK_FOR_INSN (start);
18448 if (start != BB_HEAD (bb))
18449 /* If insn and start belong to the same bb, set prev to insn,
18450 so the call to increase_distance will increase the distance
18451 between insns by 1. */
18457 && distance < LEA_SEARCH_THRESHOLD)
18459 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18461 distance = increase_distance(prev, next, distance);
18462 if (insn_uses_reg_mem (regno, next))
18464 /* Return DISTANCE if OP0 is used in memory
18465 address in NEXT. */
18470 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18472 /* Return -1 if OP0 is set in NEXT. */
18480 if (next == BB_END (bb))
18483 next = NEXT_INSN (next);
18489 /* Return the distance between INSN and the next insn that uses
18490 register number REGNO0 in memory address. Return -1 if no such
18491 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18494 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18496 basic_block bb = BLOCK_FOR_INSN (insn);
18498 bool found = false;
18499 bool redefined = false;
18501 if (insn != BB_END (bb))
18502 distance = distance_agu_use_in_bb (regno0, insn, distance,
18504 &found, &redefined);
18506 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18510 bool simple_loop = false;
18512 FOR_EACH_EDGE (e, ei, bb->succs)
18515 simple_loop = true;
18520 distance = distance_agu_use_in_bb (regno0, insn,
18521 distance, BB_HEAD (bb),
18522 &found, &redefined);
18525 int shortest_dist = -1;
18526 bool found_in_bb = false;
18527 bool redefined_in_bb = false;
18529 FOR_EACH_EDGE (e, ei, bb->succs)
18532 = distance_agu_use_in_bb (regno0, insn,
18533 distance, BB_HEAD (e->dest),
18534 &found_in_bb, &redefined_in_bb);
18537 if (shortest_dist < 0)
18538 shortest_dist = bb_dist;
18539 else if (bb_dist > 0)
18540 shortest_dist = MIN (bb_dist, shortest_dist);
18546 distance = shortest_dist;
18550 if (!found || redefined)
18553 return distance >> 1;
18556 /* Define this macro to tune LEA priority vs ADD, it take effect when
18557 there is a dilemma of choicing LEA or ADD
18558 Negative value: ADD is more preferred than LEA
18560 Positive value: LEA is more preferred than ADD*/
18561 #define IX86_LEA_PRIORITY 0
18563 /* Return true if usage of lea INSN has performance advantage
18564 over a sequence of instructions. Instructions sequence has
18565 SPLIT_COST cycles higher latency than lea latency. */
18568 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18569 unsigned int regno2, int split_cost, bool has_scale)
18571 int dist_define, dist_use;
18573 /* For Silvermont if using a 2-source or 3-source LEA for
18574 non-destructive destination purposes, or due to wanting
18575 ability to use SCALE, the use of LEA is justified. */
18576 if (TARGET_SILVERMONT || TARGET_INTEL)
18580 if (split_cost < 1)
18582 if (regno0 == regno1 || regno0 == regno2)
18587 dist_define = distance_non_agu_define (regno1, regno2, insn);
18588 dist_use = distance_agu_use (regno0, insn);
18590 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18592 /* If there is no non AGU operand definition, no AGU
18593 operand usage and split cost is 0 then both lea
18594 and non lea variants have same priority. Currently
18595 we prefer lea for 64 bit code and non lea on 32 bit
18597 if (dist_use < 0 && split_cost == 0)
18598 return TARGET_64BIT || IX86_LEA_PRIORITY;
18603 /* With longer definitions distance lea is more preferable.
18604 Here we change it to take into account splitting cost and
18606 dist_define += split_cost + IX86_LEA_PRIORITY;
18608 /* If there is no use in memory addess then we just check
18609 that split cost exceeds AGU stall. */
18611 return dist_define > LEA_MAX_STALL;
18613 /* If this insn has both backward non-agu dependence and forward
18614 agu dependence, the one with short distance takes effect. */
18615 return dist_define >= dist_use;
18618 /* Return true if it is legal to clobber flags by INSN and
18619 false otherwise. */
18622 ix86_ok_to_clobber_flags (rtx_insn *insn)
18624 basic_block bb = BLOCK_FOR_INSN (insn);
18630 if (NONDEBUG_INSN_P (insn))
18632 FOR_EACH_INSN_USE (use, insn)
18633 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18636 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18640 if (insn == BB_END (bb))
18643 insn = NEXT_INSN (insn);
18646 live = df_get_live_out(bb);
18647 return !REGNO_REG_SET_P (live, FLAGS_REG);
18650 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18651 move and add to avoid AGU stalls. */
18654 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18656 unsigned int regno0, regno1, regno2;
18658 /* Check if we need to optimize. */
18659 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18662 /* Check it is correct to split here. */
18663 if (!ix86_ok_to_clobber_flags(insn))
18666 regno0 = true_regnum (operands[0]);
18667 regno1 = true_regnum (operands[1]);
18668 regno2 = true_regnum (operands[2]);
18670 /* We need to split only adds with non destructive
18671 destination operand. */
18672 if (regno0 == regno1 || regno0 == regno2)
18675 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18678 /* Return true if we should emit lea instruction instead of mov
18682 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18684 unsigned int regno0, regno1;
18686 /* Check if we need to optimize. */
18687 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18690 /* Use lea for reg to reg moves only. */
18691 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18694 regno0 = true_regnum (operands[0]);
18695 regno1 = true_regnum (operands[1]);
18697 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18700 /* Return true if we need to split lea into a sequence of
18701 instructions to avoid AGU stalls. */
18704 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18706 unsigned int regno0, regno1, regno2;
18708 struct ix86_address parts;
18711 /* Check we need to optimize. */
18712 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18715 /* The "at least two components" test below might not catch simple
18716 move or zero extension insns if parts.base is non-NULL and parts.disp
18717 is const0_rtx as the only components in the address, e.g. if the
18718 register is %rbp or %r13. As this test is much cheaper and moves or
18719 zero extensions are the common case, do this check first. */
18720 if (REG_P (operands[1])
18721 || (SImode_address_operand (operands[1], VOIDmode)
18722 && REG_P (XEXP (operands[1], 0))))
18725 /* Check if it is OK to split here. */
18726 if (!ix86_ok_to_clobber_flags (insn))
18729 ok = ix86_decompose_address (operands[1], &parts);
18732 /* There should be at least two components in the address. */
18733 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18734 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18737 /* We should not split into add if non legitimate pic
18738 operand is used as displacement. */
18739 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18742 regno0 = true_regnum (operands[0]) ;
18743 regno1 = INVALID_REGNUM;
18744 regno2 = INVALID_REGNUM;
18747 regno1 = true_regnum (parts.base);
18749 regno2 = true_regnum (parts.index);
18753 /* Compute how many cycles we will add to execution time
18754 if split lea into a sequence of instructions. */
18755 if (parts.base || parts.index)
18757 /* Have to use mov instruction if non desctructive
18758 destination form is used. */
18759 if (regno1 != regno0 && regno2 != regno0)
18762 /* Have to add index to base if both exist. */
18763 if (parts.base && parts.index)
18766 /* Have to use shift and adds if scale is 2 or greater. */
18767 if (parts.scale > 1)
18769 if (regno0 != regno1)
18771 else if (regno2 == regno0)
18774 split_cost += parts.scale;
18777 /* Have to use add instruction with immediate if
18778 disp is non zero. */
18779 if (parts.disp && parts.disp != const0_rtx)
18782 /* Subtract the price of lea. */
18786 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18790 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18791 matches destination. RTX includes clobber of FLAGS_REG. */
18794 ix86_emit_binop (enum rtx_code code, machine_mode mode,
18799 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18800 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18802 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18805 /* Return true if regno1 def is nearest to the insn. */
18808 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18810 rtx_insn *prev = insn;
18811 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18815 while (prev && prev != start)
18817 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18819 prev = PREV_INSN (prev);
18822 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18824 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18826 prev = PREV_INSN (prev);
18829 /* None of the regs is defined in the bb. */
18833 /* Split lea instructions into a sequence of instructions
18834 which are executed on ALU to avoid AGU stalls.
18835 It is assumed that it is allowed to clobber flags register
18836 at lea position. */
18839 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
18841 unsigned int regno0, regno1, regno2;
18842 struct ix86_address parts;
18846 ok = ix86_decompose_address (operands[1], &parts);
18849 target = gen_lowpart (mode, operands[0]);
18851 regno0 = true_regnum (target);
18852 regno1 = INVALID_REGNUM;
18853 regno2 = INVALID_REGNUM;
18857 parts.base = gen_lowpart (mode, parts.base);
18858 regno1 = true_regnum (parts.base);
18863 parts.index = gen_lowpart (mode, parts.index);
18864 regno2 = true_regnum (parts.index);
18868 parts.disp = gen_lowpart (mode, parts.disp);
18870 if (parts.scale > 1)
18872 /* Case r1 = r1 + ... */
18873 if (regno1 == regno0)
18875 /* If we have a case r1 = r1 + C * r2 then we
18876 should use multiplication which is very
18877 expensive. Assume cost model is wrong if we
18878 have such case here. */
18879 gcc_assert (regno2 != regno0);
18881 for (adds = parts.scale; adds > 0; adds--)
18882 ix86_emit_binop (PLUS, mode, target, parts.index);
18886 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18887 if (regno0 != regno2)
18888 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18890 /* Use shift for scaling. */
18891 ix86_emit_binop (ASHIFT, mode, target,
18892 GEN_INT (exact_log2 (parts.scale)));
18895 ix86_emit_binop (PLUS, mode, target, parts.base);
18897 if (parts.disp && parts.disp != const0_rtx)
18898 ix86_emit_binop (PLUS, mode, target, parts.disp);
18901 else if (!parts.base && !parts.index)
18903 gcc_assert(parts.disp);
18904 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18910 if (regno0 != regno2)
18911 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18913 else if (!parts.index)
18915 if (regno0 != regno1)
18916 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18920 if (regno0 == regno1)
18922 else if (regno0 == regno2)
18928 /* Find better operand for SET instruction, depending
18929 on which definition is farther from the insn. */
18930 if (find_nearest_reg_def (insn, regno1, regno2))
18931 tmp = parts.index, tmp1 = parts.base;
18933 tmp = parts.base, tmp1 = parts.index;
18935 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18937 if (parts.disp && parts.disp != const0_rtx)
18938 ix86_emit_binop (PLUS, mode, target, parts.disp);
18940 ix86_emit_binop (PLUS, mode, target, tmp1);
18944 ix86_emit_binop (PLUS, mode, target, tmp);
18947 if (parts.disp && parts.disp != const0_rtx)
18948 ix86_emit_binop (PLUS, mode, target, parts.disp);
18952 /* Return true if it is ok to optimize an ADD operation to LEA
18953 operation to avoid flag register consumation. For most processors,
18954 ADD is faster than LEA. For the processors like BONNELL, if the
18955 destination register of LEA holds an actual address which will be
18956 used soon, LEA is better and otherwise ADD is better. */
18959 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
18961 unsigned int regno0 = true_regnum (operands[0]);
18962 unsigned int regno1 = true_regnum (operands[1]);
18963 unsigned int regno2 = true_regnum (operands[2]);
18965 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18966 if (regno0 != regno1 && regno0 != regno2)
18969 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18972 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18975 /* Return true if destination reg of SET_BODY is shift count of
18979 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18985 /* Retrieve destination of SET_BODY. */
18986 switch (GET_CODE (set_body))
18989 set_dest = SET_DEST (set_body);
18990 if (!set_dest || !REG_P (set_dest))
18994 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18995 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
19003 /* Retrieve shift count of USE_BODY. */
19004 switch (GET_CODE (use_body))
19007 shift_rtx = XEXP (use_body, 1);
19010 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
19011 if (ix86_dep_by_shift_count_body (set_body,
19012 XVECEXP (use_body, 0, i)))
19020 && (GET_CODE (shift_rtx) == ASHIFT
19021 || GET_CODE (shift_rtx) == LSHIFTRT
19022 || GET_CODE (shift_rtx) == ASHIFTRT
19023 || GET_CODE (shift_rtx) == ROTATE
19024 || GET_CODE (shift_rtx) == ROTATERT))
19026 rtx shift_count = XEXP (shift_rtx, 1);
19028 /* Return true if shift count is dest of SET_BODY. */
19029 if (REG_P (shift_count))
19031 /* Add check since it can be invoked before register
19032 allocation in pre-reload schedule. */
19033 if (reload_completed
19034 && true_regnum (set_dest) == true_regnum (shift_count))
19036 else if (REGNO(set_dest) == REGNO(shift_count))
19044 /* Return true if destination reg of SET_INSN is shift count of
19048 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19050 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19051 PATTERN (use_insn));
19054 /* Return TRUE or FALSE depending on whether the unary operator meets the
19055 appropriate constraints. */
19058 ix86_unary_operator_ok (enum rtx_code,
19062 /* If one of operands is memory, source and destination must match. */
19063 if ((MEM_P (operands[0])
19064 || MEM_P (operands[1]))
19065 && ! rtx_equal_p (operands[0], operands[1]))
19070 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19071 are ok, keeping in mind the possible movddup alternative. */
19074 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19076 if (MEM_P (operands[0]))
19077 return rtx_equal_p (operands[0], operands[1 + high]);
19078 if (MEM_P (operands[1]) && MEM_P (operands[2]))
19079 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19083 /* Post-reload splitter for converting an SF or DFmode value in an
19084 SSE register into an unsigned SImode. */
19087 ix86_split_convert_uns_si_sse (rtx operands[])
19089 machine_mode vecmode;
19090 rtx value, large, zero_or_two31, input, two31, x;
19092 large = operands[1];
19093 zero_or_two31 = operands[2];
19094 input = operands[3];
19095 two31 = operands[4];
19096 vecmode = GET_MODE (large);
19097 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19099 /* Load up the value into the low element. We must ensure that the other
19100 elements are valid floats -- zero is the easiest such value. */
19103 if (vecmode == V4SFmode)
19104 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19106 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19110 input = gen_rtx_REG (vecmode, REGNO (input));
19111 emit_move_insn (value, CONST0_RTX (vecmode));
19112 if (vecmode == V4SFmode)
19113 emit_insn (gen_sse_movss (value, value, input));
19115 emit_insn (gen_sse2_movsd (value, value, input));
19118 emit_move_insn (large, two31);
19119 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19121 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19122 emit_insn (gen_rtx_SET (VOIDmode, large, x));
19124 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19125 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
19127 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19128 emit_insn (gen_rtx_SET (VOIDmode, value, x));
19130 large = gen_rtx_REG (V4SImode, REGNO (large));
19131 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19133 x = gen_rtx_REG (V4SImode, REGNO (value));
19134 if (vecmode == V4SFmode)
19135 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19137 emit_insn (gen_sse2_cvttpd2dq (x, value));
19140 emit_insn (gen_xorv4si3 (value, value, large));
19143 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19144 Expects the 64-bit DImode to be supplied in a pair of integral
19145 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19146 -mfpmath=sse, !optimize_size only. */
19149 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19151 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19152 rtx int_xmm, fp_xmm;
19153 rtx biases, exponents;
19156 int_xmm = gen_reg_rtx (V4SImode);
19157 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19158 emit_insn (gen_movdi_to_sse (int_xmm, input));
19159 else if (TARGET_SSE_SPLIT_REGS)
19161 emit_clobber (int_xmm);
19162 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19166 x = gen_reg_rtx (V2DImode);
19167 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19168 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19171 x = gen_rtx_CONST_VECTOR (V4SImode,
19172 gen_rtvec (4, GEN_INT (0x43300000UL),
19173 GEN_INT (0x45300000UL),
19174 const0_rtx, const0_rtx));
19175 exponents = validize_mem (force_const_mem (V4SImode, x));
19177 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19178 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19180 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19181 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19182 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19183 (0x1.0p84 + double(fp_value_hi_xmm)).
19184 Note these exponents differ by 32. */
19186 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19188 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19189 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19190 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19191 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19192 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19193 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19194 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19195 biases = validize_mem (force_const_mem (V2DFmode, biases));
19196 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19198 /* Add the upper and lower DFmode values together. */
19200 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19203 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19204 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19205 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19208 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19211 /* Not used, but eases macroization of patterns. */
19213 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19215 gcc_unreachable ();
19218 /* Convert an unsigned SImode value into a DFmode. Only currently used
19219 for SSE, but applicable anywhere. */
19222 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19224 REAL_VALUE_TYPE TWO31r;
19227 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19228 NULL, 1, OPTAB_DIRECT);
19230 fp = gen_reg_rtx (DFmode);
19231 emit_insn (gen_floatsidf2 (fp, x));
19233 real_ldexp (&TWO31r, &dconst1, 31);
19234 x = const_double_from_real_value (TWO31r, DFmode);
19236 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19238 emit_move_insn (target, x);
19241 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19242 32-bit mode; otherwise we have a direct convert instruction. */
19245 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19247 REAL_VALUE_TYPE TWO32r;
19248 rtx fp_lo, fp_hi, x;
19250 fp_lo = gen_reg_rtx (DFmode);
19251 fp_hi = gen_reg_rtx (DFmode);
19253 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19255 real_ldexp (&TWO32r, &dconst1, 32);
19256 x = const_double_from_real_value (TWO32r, DFmode);
19257 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19259 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19261 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19264 emit_move_insn (target, x);
19267 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19268 For x86_32, -mfpmath=sse, !optimize_size only. */
19270 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19272 REAL_VALUE_TYPE ONE16r;
19273 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19275 real_ldexp (&ONE16r, &dconst1, 16);
19276 x = const_double_from_real_value (ONE16r, SFmode);
19277 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19278 NULL, 0, OPTAB_DIRECT);
19279 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19280 NULL, 0, OPTAB_DIRECT);
19281 fp_hi = gen_reg_rtx (SFmode);
19282 fp_lo = gen_reg_rtx (SFmode);
19283 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19284 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19285 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19287 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19289 if (!rtx_equal_p (target, fp_hi))
19290 emit_move_insn (target, fp_hi);
19293 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19294 a vector of unsigned ints VAL to vector of floats TARGET. */
19297 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19300 REAL_VALUE_TYPE TWO16r;
19301 machine_mode intmode = GET_MODE (val);
19302 machine_mode fltmode = GET_MODE (target);
19303 rtx (*cvt) (rtx, rtx);
19305 if (intmode == V4SImode)
19306 cvt = gen_floatv4siv4sf2;
19308 cvt = gen_floatv8siv8sf2;
19309 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19310 tmp[0] = force_reg (intmode, tmp[0]);
19311 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19313 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19314 NULL_RTX, 1, OPTAB_DIRECT);
19315 tmp[3] = gen_reg_rtx (fltmode);
19316 emit_insn (cvt (tmp[3], tmp[1]));
19317 tmp[4] = gen_reg_rtx (fltmode);
19318 emit_insn (cvt (tmp[4], tmp[2]));
19319 real_ldexp (&TWO16r, &dconst1, 16);
19320 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19321 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19322 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19324 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19326 if (tmp[7] != target)
19327 emit_move_insn (target, tmp[7]);
19330 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19331 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19332 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19333 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19336 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19338 REAL_VALUE_TYPE TWO31r;
19339 rtx two31r, tmp[4];
19340 machine_mode mode = GET_MODE (val);
19341 machine_mode scalarmode = GET_MODE_INNER (mode);
19342 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19343 rtx (*cmp) (rtx, rtx, rtx, rtx);
19346 for (i = 0; i < 3; i++)
19347 tmp[i] = gen_reg_rtx (mode);
19348 real_ldexp (&TWO31r, &dconst1, 31);
19349 two31r = const_double_from_real_value (TWO31r, scalarmode);
19350 two31r = ix86_build_const_vector (mode, 1, two31r);
19351 two31r = force_reg (mode, two31r);
19354 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19355 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19356 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19357 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19358 default: gcc_unreachable ();
19360 tmp[3] = gen_rtx_LE (mode, two31r, val);
19361 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19362 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19364 if (intmode == V4SImode || TARGET_AVX2)
19365 *xorp = expand_simple_binop (intmode, ASHIFT,
19366 gen_lowpart (intmode, tmp[0]),
19367 GEN_INT (31), NULL_RTX, 0,
19371 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
19372 two31 = ix86_build_const_vector (intmode, 1, two31);
19373 *xorp = expand_simple_binop (intmode, AND,
19374 gen_lowpart (intmode, tmp[0]),
19375 two31, NULL_RTX, 0,
19378 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19382 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19383 then replicate the value for all elements of the vector
19387 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19391 machine_mode scalar_mode;
19414 n_elt = GET_MODE_NUNITS (mode);
19415 v = rtvec_alloc (n_elt);
19416 scalar_mode = GET_MODE_INNER (mode);
19418 RTVEC_ELT (v, 0) = value;
19420 for (i = 1; i < n_elt; ++i)
19421 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19423 return gen_rtx_CONST_VECTOR (mode, v);
19426 gcc_unreachable ();
19430 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19431 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19432 for an SSE register. If VECT is true, then replicate the mask for
19433 all elements of the vector register. If INVERT is true, then create
19434 a mask excluding the sign bit. */
19437 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19439 machine_mode vec_mode, imode;
19440 HOST_WIDE_INT hi, lo;
19445 /* Find the sign bit, sign extended to 2*HWI. */
19455 mode = GET_MODE_INNER (mode);
19457 lo = 0x80000000, hi = lo < 0;
19467 mode = GET_MODE_INNER (mode);
19469 if (HOST_BITS_PER_WIDE_INT >= 64)
19470 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19472 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19477 vec_mode = VOIDmode;
19478 if (HOST_BITS_PER_WIDE_INT >= 64)
19481 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19488 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19492 lo = ~lo, hi = ~hi;
19498 mask = immed_double_const (lo, hi, imode);
19500 vec = gen_rtvec (2, v, mask);
19501 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19502 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19509 gcc_unreachable ();
19513 lo = ~lo, hi = ~hi;
19515 /* Force this value into the low part of a fp vector constant. */
19516 mask = immed_double_const (lo, hi, imode);
19517 mask = gen_lowpart (mode, mask);
19519 if (vec_mode == VOIDmode)
19520 return force_reg (mode, mask);
19522 v = ix86_build_const_vector (vec_mode, vect, mask);
19523 return force_reg (vec_mode, v);
19526 /* Generate code for floating point ABS or NEG. */
19529 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19532 rtx mask, set, dst, src;
19533 bool use_sse = false;
19534 bool vector_mode = VECTOR_MODE_P (mode);
19535 machine_mode vmode = mode;
19539 else if (mode == TFmode)
19541 else if (TARGET_SSE_MATH)
19543 use_sse = SSE_FLOAT_MODE_P (mode);
19544 if (mode == SFmode)
19546 else if (mode == DFmode)
19550 /* NEG and ABS performed with SSE use bitwise mask operations.
19551 Create the appropriate mask now. */
19553 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19560 set = gen_rtx_fmt_e (code, mode, src);
19561 set = gen_rtx_SET (VOIDmode, dst, set);
19568 use = gen_rtx_USE (VOIDmode, mask);
19570 par = gen_rtvec (2, set, use);
19573 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19574 par = gen_rtvec (3, set, use, clob);
19576 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19582 /* Expand a copysign operation. Special case operand 0 being a constant. */
19585 ix86_expand_copysign (rtx operands[])
19587 machine_mode mode, vmode;
19588 rtx dest, op0, op1, mask, nmask;
19590 dest = operands[0];
19594 mode = GET_MODE (dest);
19596 if (mode == SFmode)
19598 else if (mode == DFmode)
19603 if (GET_CODE (op0) == CONST_DOUBLE)
19605 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19607 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19608 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19610 if (mode == SFmode || mode == DFmode)
19612 if (op0 == CONST0_RTX (mode))
19613 op0 = CONST0_RTX (vmode);
19616 rtx v = ix86_build_const_vector (vmode, false, op0);
19618 op0 = force_reg (vmode, v);
19621 else if (op0 != CONST0_RTX (mode))
19622 op0 = force_reg (mode, op0);
19624 mask = ix86_build_signbit_mask (vmode, 0, 0);
19626 if (mode == SFmode)
19627 copysign_insn = gen_copysignsf3_const;
19628 else if (mode == DFmode)
19629 copysign_insn = gen_copysigndf3_const;
19631 copysign_insn = gen_copysigntf3_const;
19633 emit_insn (copysign_insn (dest, op0, op1, mask));
19637 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19639 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19640 mask = ix86_build_signbit_mask (vmode, 0, 0);
19642 if (mode == SFmode)
19643 copysign_insn = gen_copysignsf3_var;
19644 else if (mode == DFmode)
19645 copysign_insn = gen_copysigndf3_var;
19647 copysign_insn = gen_copysigntf3_var;
19649 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19653 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19654 be a constant, and so has already been expanded into a vector constant. */
19657 ix86_split_copysign_const (rtx operands[])
19659 machine_mode mode, vmode;
19660 rtx dest, op0, mask, x;
19662 dest = operands[0];
19664 mask = operands[3];
19666 mode = GET_MODE (dest);
19667 vmode = GET_MODE (mask);
19669 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19670 x = gen_rtx_AND (vmode, dest, mask);
19671 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19673 if (op0 != CONST0_RTX (vmode))
19675 x = gen_rtx_IOR (vmode, dest, op0);
19676 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19680 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19681 so we have to do two masks. */
19684 ix86_split_copysign_var (rtx operands[])
19686 machine_mode mode, vmode;
19687 rtx dest, scratch, op0, op1, mask, nmask, x;
19689 dest = operands[0];
19690 scratch = operands[1];
19693 nmask = operands[4];
19694 mask = operands[5];
19696 mode = GET_MODE (dest);
19697 vmode = GET_MODE (mask);
19699 if (rtx_equal_p (op0, op1))
19701 /* Shouldn't happen often (it's useless, obviously), but when it does
19702 we'd generate incorrect code if we continue below. */
19703 emit_move_insn (dest, op0);
19707 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19709 gcc_assert (REGNO (op1) == REGNO (scratch));
19711 x = gen_rtx_AND (vmode, scratch, mask);
19712 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19715 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19716 x = gen_rtx_NOT (vmode, dest);
19717 x = gen_rtx_AND (vmode, x, op0);
19718 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19722 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19724 x = gen_rtx_AND (vmode, scratch, mask);
19726 else /* alternative 2,4 */
19728 gcc_assert (REGNO (mask) == REGNO (scratch));
19729 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19730 x = gen_rtx_AND (vmode, scratch, op1);
19732 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19734 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19736 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19737 x = gen_rtx_AND (vmode, dest, nmask);
19739 else /* alternative 3,4 */
19741 gcc_assert (REGNO (nmask) == REGNO (dest));
19743 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19744 x = gen_rtx_AND (vmode, dest, op0);
19746 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19749 x = gen_rtx_IOR (vmode, dest, scratch);
19750 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19753 /* Return TRUE or FALSE depending on whether the first SET in INSN
19754 has source and destination with matching CC modes, and that the
19755 CC mode is at least as constrained as REQ_MODE. */
19758 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19761 machine_mode set_mode;
19763 set = PATTERN (insn);
19764 if (GET_CODE (set) == PARALLEL)
19765 set = XVECEXP (set, 0, 0);
19766 gcc_assert (GET_CODE (set) == SET);
19767 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19769 set_mode = GET_MODE (SET_DEST (set));
19773 if (req_mode != CCNOmode
19774 && (req_mode != CCmode
19775 || XEXP (SET_SRC (set), 1) != const0_rtx))
19779 if (req_mode == CCGCmode)
19783 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19787 if (req_mode == CCZmode)
19797 if (set_mode != req_mode)
19802 gcc_unreachable ();
19805 return GET_MODE (SET_SRC (set)) == set_mode;
19808 /* Generate insn patterns to do an integer compare of OPERANDS. */
19811 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19813 machine_mode cmpmode;
19816 cmpmode = SELECT_CC_MODE (code, op0, op1);
19817 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19819 /* This is very simple, but making the interface the same as in the
19820 FP case makes the rest of the code easier. */
19821 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19822 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19824 /* Return the test that should be put into the flags user, i.e.
19825 the bcc, scc, or cmov instruction. */
19826 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19829 /* Figure out whether to use ordered or unordered fp comparisons.
19830 Return the appropriate mode to use. */
19833 ix86_fp_compare_mode (enum rtx_code)
19835 /* ??? In order to make all comparisons reversible, we do all comparisons
19836 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19837 all forms trapping and nontrapping comparisons, we can make inequality
19838 comparisons trapping again, since it results in better code when using
19839 FCOM based compares. */
19840 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19844 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19846 machine_mode mode = GET_MODE (op0);
19848 if (SCALAR_FLOAT_MODE_P (mode))
19850 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19851 return ix86_fp_compare_mode (code);
19856 /* Only zero flag is needed. */
19857 case EQ: /* ZF=0 */
19858 case NE: /* ZF!=0 */
19860 /* Codes needing carry flag. */
19861 case GEU: /* CF=0 */
19862 case LTU: /* CF=1 */
19863 /* Detect overflow checks. They need just the carry flag. */
19864 if (GET_CODE (op0) == PLUS
19865 && rtx_equal_p (op1, XEXP (op0, 0)))
19869 case GTU: /* CF=0 & ZF=0 */
19870 case LEU: /* CF=1 | ZF=1 */
19872 /* Codes possibly doable only with sign flag when
19873 comparing against zero. */
19874 case GE: /* SF=OF or SF=0 */
19875 case LT: /* SF<>OF or SF=1 */
19876 if (op1 == const0_rtx)
19879 /* For other cases Carry flag is not required. */
19881 /* Codes doable only with sign flag when comparing
19882 against zero, but we miss jump instruction for it
19883 so we need to use relational tests against overflow
19884 that thus needs to be zero. */
19885 case GT: /* ZF=0 & SF=OF */
19886 case LE: /* ZF=1 | SF<>OF */
19887 if (op1 == const0_rtx)
19891 /* strcmp pattern do (use flags) and combine may ask us for proper
19896 gcc_unreachable ();
19900 /* Return the fixed registers used for condition codes. */
19903 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19910 /* If two condition code modes are compatible, return a condition code
19911 mode which is compatible with both. Otherwise, return
19914 static machine_mode
19915 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
19920 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19923 if ((m1 == CCGCmode && m2 == CCGOCmode)
19924 || (m1 == CCGOCmode && m2 == CCGCmode))
19927 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19929 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19935 gcc_unreachable ();
19965 /* These are only compatible with themselves, which we already
19972 /* Return a comparison we can do and that it is equivalent to
19973 swap_condition (code) apart possibly from orderedness.
19974 But, never change orderedness if TARGET_IEEE_FP, returning
19975 UNKNOWN in that case if necessary. */
19977 static enum rtx_code
19978 ix86_fp_swap_condition (enum rtx_code code)
19982 case GT: /* GTU - CF=0 & ZF=0 */
19983 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19984 case GE: /* GEU - CF=0 */
19985 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19986 case UNLT: /* LTU - CF=1 */
19987 return TARGET_IEEE_FP ? UNKNOWN : GT;
19988 case UNLE: /* LEU - CF=1 | ZF=1 */
19989 return TARGET_IEEE_FP ? UNKNOWN : GE;
19991 return swap_condition (code);
19995 /* Return cost of comparison CODE using the best strategy for performance.
19996 All following functions do use number of instructions as a cost metrics.
19997 In future this should be tweaked to compute bytes for optimize_size and
19998 take into account performance of various instructions on various CPUs. */
20001 ix86_fp_comparison_cost (enum rtx_code code)
20005 /* The cost of code using bit-twiddling on %ah. */
20022 arith_cost = TARGET_IEEE_FP ? 5 : 4;
20026 arith_cost = TARGET_IEEE_FP ? 6 : 4;
20029 gcc_unreachable ();
20032 switch (ix86_fp_comparison_strategy (code))
20034 case IX86_FPCMP_COMI:
20035 return arith_cost > 4 ? 3 : 2;
20036 case IX86_FPCMP_SAHF:
20037 return arith_cost > 4 ? 4 : 3;
20043 /* Return strategy to use for floating-point. We assume that fcomi is always
20044 preferrable where available, since that is also true when looking at size
20045 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
20047 enum ix86_fpcmp_strategy
20048 ix86_fp_comparison_strategy (enum rtx_code)
20050 /* Do fcomi/sahf based test when profitable. */
20053 return IX86_FPCMP_COMI;
20055 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20056 return IX86_FPCMP_SAHF;
20058 return IX86_FPCMP_ARITH;
20061 /* Swap, force into registers, or otherwise massage the two operands
20062 to a fp comparison. The operands are updated in place; the new
20063 comparison code is returned. */
20065 static enum rtx_code
20066 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20068 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20069 rtx op0 = *pop0, op1 = *pop1;
20070 machine_mode op_mode = GET_MODE (op0);
20071 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20073 /* All of the unordered compare instructions only work on registers.
20074 The same is true of the fcomi compare instructions. The XFmode
20075 compare instructions require registers except when comparing
20076 against zero or when converting operand 1 from fixed point to
20080 && (fpcmp_mode == CCFPUmode
20081 || (op_mode == XFmode
20082 && ! (standard_80387_constant_p (op0) == 1
20083 || standard_80387_constant_p (op1) == 1)
20084 && GET_CODE (op1) != FLOAT)
20085 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20087 op0 = force_reg (op_mode, op0);
20088 op1 = force_reg (op_mode, op1);
20092 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
20093 things around if they appear profitable, otherwise force op0
20094 into a register. */
20096 if (standard_80387_constant_p (op0) == 0
20098 && ! (standard_80387_constant_p (op1) == 0
20101 enum rtx_code new_code = ix86_fp_swap_condition (code);
20102 if (new_code != UNKNOWN)
20104 std::swap (op0, op1);
20110 op0 = force_reg (op_mode, op0);
20112 if (CONSTANT_P (op1))
20114 int tmp = standard_80387_constant_p (op1);
20116 op1 = validize_mem (force_const_mem (op_mode, op1));
20120 op1 = force_reg (op_mode, op1);
20123 op1 = force_reg (op_mode, op1);
20127 /* Try to rearrange the comparison to make it cheaper. */
20128 if (ix86_fp_comparison_cost (code)
20129 > ix86_fp_comparison_cost (swap_condition (code))
20130 && (REG_P (op1) || can_create_pseudo_p ()))
20132 std::swap (op0, op1);
20133 code = swap_condition (code);
20135 op0 = force_reg (op_mode, op0);
20143 /* Convert comparison codes we use to represent FP comparison to integer
20144 code that will result in proper branch. Return UNKNOWN if no such code
20148 ix86_fp_compare_code_to_integer (enum rtx_code code)
20177 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20180 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20182 machine_mode fpcmp_mode, intcmp_mode;
20185 fpcmp_mode = ix86_fp_compare_mode (code);
20186 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20188 /* Do fcomi/sahf based test when profitable. */
20189 switch (ix86_fp_comparison_strategy (code))
20191 case IX86_FPCMP_COMI:
20192 intcmp_mode = fpcmp_mode;
20193 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20194 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20199 case IX86_FPCMP_SAHF:
20200 intcmp_mode = fpcmp_mode;
20201 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20202 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20206 scratch = gen_reg_rtx (HImode);
20207 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20208 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20211 case IX86_FPCMP_ARITH:
20212 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20213 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20214 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20216 scratch = gen_reg_rtx (HImode);
20217 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
20219 /* In the unordered case, we have to check C2 for NaN's, which
20220 doesn't happen to work out to anything nice combination-wise.
20221 So do some bit twiddling on the value we've got in AH to come
20222 up with an appropriate set of condition codes. */
20224 intcmp_mode = CCNOmode;
20229 if (code == GT || !TARGET_IEEE_FP)
20231 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20236 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20237 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20238 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20239 intcmp_mode = CCmode;
20245 if (code == LT && TARGET_IEEE_FP)
20247 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20248 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20249 intcmp_mode = CCmode;
20254 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20260 if (code == GE || !TARGET_IEEE_FP)
20262 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20267 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20268 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20274 if (code == LE && TARGET_IEEE_FP)
20276 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20277 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20278 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20279 intcmp_mode = CCmode;
20284 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20290 if (code == EQ && TARGET_IEEE_FP)
20292 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20293 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20294 intcmp_mode = CCmode;
20299 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20305 if (code == NE && TARGET_IEEE_FP)
20307 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20308 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20314 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20320 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20324 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20329 gcc_unreachable ();
20337 /* Return the test that should be put into the flags user, i.e.
20338 the bcc, scc, or cmov instruction. */
20339 return gen_rtx_fmt_ee (code, VOIDmode,
20340 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20345 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20349 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20350 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20352 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20354 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20355 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20358 ret = ix86_expand_int_compare (code, op0, op1);
20364 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20366 machine_mode mode = GET_MODE (op0);
20378 tmp = ix86_expand_compare (code, op0, op1);
20379 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20380 gen_rtx_LABEL_REF (VOIDmode, label),
20382 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20389 /* Expand DImode branch into multiple compare+branch. */
20392 rtx_code_label *label2;
20393 enum rtx_code code1, code2, code3;
20394 machine_mode submode;
20396 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20398 std::swap (op0, op1);
20399 code = swap_condition (code);
20402 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20403 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20405 submode = mode == DImode ? SImode : DImode;
20407 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20408 avoid two branches. This costs one extra insn, so disable when
20409 optimizing for size. */
20411 if ((code == EQ || code == NE)
20412 && (!optimize_insn_for_size_p ()
20413 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20418 if (hi[1] != const0_rtx)
20419 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20420 NULL_RTX, 0, OPTAB_WIDEN);
20423 if (lo[1] != const0_rtx)
20424 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20425 NULL_RTX, 0, OPTAB_WIDEN);
20427 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20428 NULL_RTX, 0, OPTAB_WIDEN);
20430 ix86_expand_branch (code, tmp, const0_rtx, label);
20434 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20435 op1 is a constant and the low word is zero, then we can just
20436 examine the high word. Similarly for low word -1 and
20437 less-or-equal-than or greater-than. */
20439 if (CONST_INT_P (hi[1]))
20442 case LT: case LTU: case GE: case GEU:
20443 if (lo[1] == const0_rtx)
20445 ix86_expand_branch (code, hi[0], hi[1], label);
20449 case LE: case LEU: case GT: case GTU:
20450 if (lo[1] == constm1_rtx)
20452 ix86_expand_branch (code, hi[0], hi[1], label);
20460 /* Otherwise, we need two or three jumps. */
20462 label2 = gen_label_rtx ();
20465 code2 = swap_condition (code);
20466 code3 = unsigned_condition (code);
20470 case LT: case GT: case LTU: case GTU:
20473 case LE: code1 = LT; code2 = GT; break;
20474 case GE: code1 = GT; code2 = LT; break;
20475 case LEU: code1 = LTU; code2 = GTU; break;
20476 case GEU: code1 = GTU; code2 = LTU; break;
20478 case EQ: code1 = UNKNOWN; code2 = NE; break;
20479 case NE: code2 = UNKNOWN; break;
20482 gcc_unreachable ();
20487 * if (hi(a) < hi(b)) goto true;
20488 * if (hi(a) > hi(b)) goto false;
20489 * if (lo(a) < lo(b)) goto true;
20493 if (code1 != UNKNOWN)
20494 ix86_expand_branch (code1, hi[0], hi[1], label);
20495 if (code2 != UNKNOWN)
20496 ix86_expand_branch (code2, hi[0], hi[1], label2);
20498 ix86_expand_branch (code3, lo[0], lo[1], label);
20500 if (code2 != UNKNOWN)
20501 emit_label (label2);
20506 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20511 /* Split branch based on floating point condition. */
20513 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20514 rtx target1, rtx target2, rtx tmp)
20519 if (target2 != pc_rtx)
20521 std::swap (target1, target2);
20522 code = reverse_condition_maybe_unordered (code);
20525 condition = ix86_expand_fp_compare (code, op1, op2,
20528 i = emit_jump_insn (gen_rtx_SET
20530 gen_rtx_IF_THEN_ELSE (VOIDmode,
20531 condition, target1, target2)));
20532 if (split_branch_probability >= 0)
20533 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20537 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20541 gcc_assert (GET_MODE (dest) == QImode);
20543 ret = ix86_expand_compare (code, op0, op1);
20544 PUT_MODE (ret, QImode);
20545 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20548 /* Expand comparison setting or clearing carry flag. Return true when
20549 successful and set pop for the operation. */
20551 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20553 machine_mode mode =
20554 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20556 /* Do not handle double-mode compares that go through special path. */
20557 if (mode == (TARGET_64BIT ? TImode : DImode))
20560 if (SCALAR_FLOAT_MODE_P (mode))
20563 rtx_insn *compare_seq;
20565 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20567 /* Shortcut: following common codes never translate
20568 into carry flag compares. */
20569 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20570 || code == ORDERED || code == UNORDERED)
20573 /* These comparisons require zero flag; swap operands so they won't. */
20574 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20575 && !TARGET_IEEE_FP)
20577 std::swap (op0, op1);
20578 code = swap_condition (code);
20581 /* Try to expand the comparison and verify that we end up with
20582 carry flag based comparison. This fails to be true only when
20583 we decide to expand comparison using arithmetic that is not
20584 too common scenario. */
20586 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20587 compare_seq = get_insns ();
20590 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20591 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20592 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20594 code = GET_CODE (compare_op);
20596 if (code != LTU && code != GEU)
20599 emit_insn (compare_seq);
20604 if (!INTEGRAL_MODE_P (mode))
20613 /* Convert a==0 into (unsigned)a<1. */
20616 if (op1 != const0_rtx)
20619 code = (code == EQ ? LTU : GEU);
20622 /* Convert a>b into b<a or a>=b-1. */
20625 if (CONST_INT_P (op1))
20627 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20628 /* Bail out on overflow. We still can swap operands but that
20629 would force loading of the constant into register. */
20630 if (op1 == const0_rtx
20631 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20633 code = (code == GTU ? GEU : LTU);
20637 std::swap (op0, op1);
20638 code = (code == GTU ? LTU : GEU);
20642 /* Convert a>=0 into (unsigned)a<0x80000000. */
20645 if (mode == DImode || op1 != const0_rtx)
20647 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20648 code = (code == LT ? GEU : LTU);
20652 if (mode == DImode || op1 != constm1_rtx)
20654 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20655 code = (code == LE ? GEU : LTU);
20661 /* Swapping operands may cause constant to appear as first operand. */
20662 if (!nonimmediate_operand (op0, VOIDmode))
20664 if (!can_create_pseudo_p ())
20666 op0 = force_reg (mode, op0);
20668 *pop = ix86_expand_compare (code, op0, op1);
20669 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20674 ix86_expand_int_movcc (rtx operands[])
20676 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20677 rtx_insn *compare_seq;
20679 machine_mode mode = GET_MODE (operands[0]);
20680 bool sign_bit_compare_p = false;
20681 rtx op0 = XEXP (operands[1], 0);
20682 rtx op1 = XEXP (operands[1], 1);
20684 if (GET_MODE (op0) == TImode
20685 || (GET_MODE (op0) == DImode
20690 compare_op = ix86_expand_compare (code, op0, op1);
20691 compare_seq = get_insns ();
20694 compare_code = GET_CODE (compare_op);
20696 if ((op1 == const0_rtx && (code == GE || code == LT))
20697 || (op1 == constm1_rtx && (code == GT || code == LE)))
20698 sign_bit_compare_p = true;
20700 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20701 HImode insns, we'd be swallowed in word prefix ops. */
20703 if ((mode != HImode || TARGET_FAST_PREFIX)
20704 && (mode != (TARGET_64BIT ? TImode : DImode))
20705 && CONST_INT_P (operands[2])
20706 && CONST_INT_P (operands[3]))
20708 rtx out = operands[0];
20709 HOST_WIDE_INT ct = INTVAL (operands[2]);
20710 HOST_WIDE_INT cf = INTVAL (operands[3]);
20711 HOST_WIDE_INT diff;
20714 /* Sign bit compares are better done using shifts than we do by using
20716 if (sign_bit_compare_p
20717 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20719 /* Detect overlap between destination and compare sources. */
20722 if (!sign_bit_compare_p)
20725 bool fpcmp = false;
20727 compare_code = GET_CODE (compare_op);
20729 flags = XEXP (compare_op, 0);
20731 if (GET_MODE (flags) == CCFPmode
20732 || GET_MODE (flags) == CCFPUmode)
20736 = ix86_fp_compare_code_to_integer (compare_code);
20739 /* To simplify rest of code, restrict to the GEU case. */
20740 if (compare_code == LTU)
20742 std::swap (ct, cf);
20743 compare_code = reverse_condition (compare_code);
20744 code = reverse_condition (code);
20749 PUT_CODE (compare_op,
20750 reverse_condition_maybe_unordered
20751 (GET_CODE (compare_op)));
20753 PUT_CODE (compare_op,
20754 reverse_condition (GET_CODE (compare_op)));
20758 if (reg_overlap_mentioned_p (out, op0)
20759 || reg_overlap_mentioned_p (out, op1))
20760 tmp = gen_reg_rtx (mode);
20762 if (mode == DImode)
20763 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20765 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20766 flags, compare_op));
20770 if (code == GT || code == GE)
20771 code = reverse_condition (code);
20774 std::swap (ct, cf);
20777 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20790 tmp = expand_simple_binop (mode, PLUS,
20792 copy_rtx (tmp), 1, OPTAB_DIRECT);
20803 tmp = expand_simple_binop (mode, IOR,
20805 copy_rtx (tmp), 1, OPTAB_DIRECT);
20807 else if (diff == -1 && ct)
20817 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20819 tmp = expand_simple_binop (mode, PLUS,
20820 copy_rtx (tmp), GEN_INT (cf),
20821 copy_rtx (tmp), 1, OPTAB_DIRECT);
20829 * andl cf - ct, dest
20839 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20842 tmp = expand_simple_binop (mode, AND,
20844 gen_int_mode (cf - ct, mode),
20845 copy_rtx (tmp), 1, OPTAB_DIRECT);
20847 tmp = expand_simple_binop (mode, PLUS,
20848 copy_rtx (tmp), GEN_INT (ct),
20849 copy_rtx (tmp), 1, OPTAB_DIRECT);
20852 if (!rtx_equal_p (tmp, out))
20853 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20860 machine_mode cmp_mode = GET_MODE (op0);
20861 enum rtx_code new_code;
20863 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20865 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20867 /* We may be reversing unordered compare to normal compare, that
20868 is not valid in general (we may convert non-trapping condition
20869 to trapping one), however on i386 we currently emit all
20870 comparisons unordered. */
20871 new_code = reverse_condition_maybe_unordered (code);
20874 new_code = ix86_reverse_condition (code, cmp_mode);
20875 if (new_code != UNKNOWN)
20877 std::swap (ct, cf);
20883 compare_code = UNKNOWN;
20884 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20885 && CONST_INT_P (op1))
20887 if (op1 == const0_rtx
20888 && (code == LT || code == GE))
20889 compare_code = code;
20890 else if (op1 == constm1_rtx)
20894 else if (code == GT)
20899 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20900 if (compare_code != UNKNOWN
20901 && GET_MODE (op0) == GET_MODE (out)
20902 && (cf == -1 || ct == -1))
20904 /* If lea code below could be used, only optimize
20905 if it results in a 2 insn sequence. */
20907 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20908 || diff == 3 || diff == 5 || diff == 9)
20909 || (compare_code == LT && ct == -1)
20910 || (compare_code == GE && cf == -1))
20913 * notl op1 (if necessary)
20921 code = reverse_condition (code);
20924 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20926 out = expand_simple_binop (mode, IOR,
20928 out, 1, OPTAB_DIRECT);
20929 if (out != operands[0])
20930 emit_move_insn (operands[0], out);
20937 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20938 || diff == 3 || diff == 5 || diff == 9)
20939 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20941 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20947 * lea cf(dest*(ct-cf)),dest
20951 * This also catches the degenerate setcc-only case.
20957 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20960 /* On x86_64 the lea instruction operates on Pmode, so we need
20961 to get arithmetics done in proper mode to match. */
20963 tmp = copy_rtx (out);
20967 out1 = copy_rtx (out);
20968 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20972 tmp = gen_rtx_PLUS (mode, tmp, out1);
20978 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20981 if (!rtx_equal_p (tmp, out))
20984 out = force_operand (tmp, copy_rtx (out));
20986 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20988 if (!rtx_equal_p (out, operands[0]))
20989 emit_move_insn (operands[0], copy_rtx (out));
20995 * General case: Jumpful:
20996 * xorl dest,dest cmpl op1, op2
20997 * cmpl op1, op2 movl ct, dest
20998 * setcc dest jcc 1f
20999 * decl dest movl cf, dest
21000 * andl (cf-ct),dest 1:
21003 * Size 20. Size 14.
21005 * This is reasonably steep, but branch mispredict costs are
21006 * high on modern cpus, so consider failing only if optimizing
21010 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21011 && BRANCH_COST (optimize_insn_for_speed_p (),
21016 machine_mode cmp_mode = GET_MODE (op0);
21017 enum rtx_code new_code;
21019 if (SCALAR_FLOAT_MODE_P (cmp_mode))
21021 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
21023 /* We may be reversing unordered compare to normal compare,
21024 that is not valid in general (we may convert non-trapping
21025 condition to trapping one), however on i386 we currently
21026 emit all comparisons unordered. */
21027 new_code = reverse_condition_maybe_unordered (code);
21031 new_code = ix86_reverse_condition (code, cmp_mode);
21032 if (compare_code != UNKNOWN && new_code != UNKNOWN)
21033 compare_code = reverse_condition (compare_code);
21036 if (new_code != UNKNOWN)
21044 if (compare_code != UNKNOWN)
21046 /* notl op1 (if needed)
21051 For x < 0 (resp. x <= -1) there will be no notl,
21052 so if possible swap the constants to get rid of the
21054 True/false will be -1/0 while code below (store flag
21055 followed by decrement) is 0/-1, so the constants need
21056 to be exchanged once more. */
21058 if (compare_code == GE || !cf)
21060 code = reverse_condition (code);
21064 std::swap (ct, cf);
21066 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21070 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21072 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21074 copy_rtx (out), 1, OPTAB_DIRECT);
21077 out = expand_simple_binop (mode, AND, copy_rtx (out),
21078 gen_int_mode (cf - ct, mode),
21079 copy_rtx (out), 1, OPTAB_DIRECT);
21081 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21082 copy_rtx (out), 1, OPTAB_DIRECT);
21083 if (!rtx_equal_p (out, operands[0]))
21084 emit_move_insn (operands[0], copy_rtx (out));
21090 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21092 /* Try a few things more with specific constants and a variable. */
21095 rtx var, orig_out, out, tmp;
21097 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21100 /* If one of the two operands is an interesting constant, load a
21101 constant with the above and mask it in with a logical operation. */
21103 if (CONST_INT_P (operands[2]))
21106 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21107 operands[3] = constm1_rtx, op = and_optab;
21108 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21109 operands[3] = const0_rtx, op = ior_optab;
21113 else if (CONST_INT_P (operands[3]))
21116 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21117 operands[2] = constm1_rtx, op = and_optab;
21118 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21119 operands[2] = const0_rtx, op = ior_optab;
21126 orig_out = operands[0];
21127 tmp = gen_reg_rtx (mode);
21130 /* Recurse to get the constant loaded. */
21131 if (ix86_expand_int_movcc (operands) == 0)
21134 /* Mask in the interesting variable. */
21135 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21137 if (!rtx_equal_p (out, orig_out))
21138 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21144 * For comparison with above,
21154 if (! nonimmediate_operand (operands[2], mode))
21155 operands[2] = force_reg (mode, operands[2]);
21156 if (! nonimmediate_operand (operands[3], mode))
21157 operands[3] = force_reg (mode, operands[3]);
21159 if (! register_operand (operands[2], VOIDmode)
21161 || ! register_operand (operands[3], VOIDmode)))
21162 operands[2] = force_reg (mode, operands[2]);
21165 && ! register_operand (operands[3], VOIDmode))
21166 operands[3] = force_reg (mode, operands[3]);
21168 emit_insn (compare_seq);
21169 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21170 gen_rtx_IF_THEN_ELSE (mode,
21171 compare_op, operands[2],
21176 /* Swap, force into registers, or otherwise massage the two operands
21177 to an sse comparison with a mask result. Thus we differ a bit from
21178 ix86_prepare_fp_compare_args which expects to produce a flags result.
21180 The DEST operand exists to help determine whether to commute commutative
21181 operators. The POP0/POP1 operands are updated in place. The new
21182 comparison code is returned, or UNKNOWN if not implementable. */
21184 static enum rtx_code
21185 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21186 rtx *pop0, rtx *pop1)
21192 /* AVX supports all the needed comparisons. */
21195 /* We have no LTGT as an operator. We could implement it with
21196 NE & ORDERED, but this requires an extra temporary. It's
21197 not clear that it's worth it. */
21204 /* These are supported directly. */
21211 /* AVX has 3 operand comparisons, no need to swap anything. */
21214 /* For commutative operators, try to canonicalize the destination
21215 operand to be first in the comparison - this helps reload to
21216 avoid extra moves. */
21217 if (!dest || !rtx_equal_p (dest, *pop1))
21225 /* These are not supported directly before AVX, and furthermore
21226 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21227 comparison operands to transform into something that is
21229 std::swap (*pop0, *pop1);
21230 code = swap_condition (code);
21234 gcc_unreachable ();
21240 /* Detect conditional moves that exactly match min/max operational
21241 semantics. Note that this is IEEE safe, as long as we don't
21242 interchange the operands.
21244 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21245 and TRUE if the operation is successful and instructions are emitted. */
21248 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21249 rtx cmp_op1, rtx if_true, rtx if_false)
21257 else if (code == UNGE)
21258 std::swap (if_true, if_false);
21262 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21264 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21269 mode = GET_MODE (dest);
21271 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21272 but MODE may be a vector mode and thus not appropriate. */
21273 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21275 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21278 if_true = force_reg (mode, if_true);
21279 v = gen_rtvec (2, if_true, if_false);
21280 tmp = gen_rtx_UNSPEC (mode, v, u);
21284 code = is_min ? SMIN : SMAX;
21285 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21288 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
21292 /* Expand an sse vector comparison. Return the register with the result. */
21295 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21296 rtx op_true, rtx op_false)
21298 machine_mode mode = GET_MODE (dest);
21299 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21301 /* In general case result of comparison can differ from operands' type. */
21302 machine_mode cmp_mode;
21304 /* In AVX512F the result of comparison is an integer mask. */
21305 bool maskcmp = false;
21308 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21310 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21311 gcc_assert (cmp_mode != BLKmode);
21316 cmp_mode = cmp_ops_mode;
21319 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21320 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21321 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21324 || reg_overlap_mentioned_p (dest, op_true)
21325 || reg_overlap_mentioned_p (dest, op_false))
21326 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21328 /* Compare patterns for int modes are unspec in AVX512F only. */
21329 if (maskcmp && (code == GT || code == EQ))
21331 rtx (*gen)(rtx, rtx, rtx);
21333 switch (cmp_ops_mode)
21336 gcc_assert (TARGET_AVX512BW);
21337 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
21340 gcc_assert (TARGET_AVX512BW);
21341 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
21344 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21347 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21355 emit_insn (gen (dest, cmp_op0, cmp_op1));
21359 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21361 if (cmp_mode != mode && !maskcmp)
21363 x = force_reg (cmp_ops_mode, x);
21364 convert_move (dest, x, false);
21367 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21372 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21373 operations. This is used for both scalar and vector conditional moves. */
21376 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21378 machine_mode mode = GET_MODE (dest);
21379 machine_mode cmpmode = GET_MODE (cmp);
21381 /* In AVX512F the result of comparison is an integer mask. */
21382 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21386 if (vector_all_ones_operand (op_true, mode)
21387 && rtx_equal_p (op_false, CONST0_RTX (mode))
21390 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
21392 else if (op_false == CONST0_RTX (mode)
21395 op_true = force_reg (mode, op_true);
21396 x = gen_rtx_AND (mode, cmp, op_true);
21397 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21399 else if (op_true == CONST0_RTX (mode)
21402 op_false = force_reg (mode, op_false);
21403 x = gen_rtx_NOT (mode, cmp);
21404 x = gen_rtx_AND (mode, x, op_false);
21405 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21407 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21410 op_false = force_reg (mode, op_false);
21411 x = gen_rtx_IOR (mode, cmp, op_false);
21412 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21414 else if (TARGET_XOP
21417 op_true = force_reg (mode, op_true);
21419 if (!nonimmediate_operand (op_false, mode))
21420 op_false = force_reg (mode, op_false);
21422 emit_insn (gen_rtx_SET (mode, dest,
21423 gen_rtx_IF_THEN_ELSE (mode, cmp,
21429 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21432 if (!nonimmediate_operand (op_true, mode))
21433 op_true = force_reg (mode, op_true);
21435 op_false = force_reg (mode, op_false);
21441 gen = gen_sse4_1_blendvps;
21445 gen = gen_sse4_1_blendvpd;
21453 gen = gen_sse4_1_pblendvb;
21454 if (mode != V16QImode)
21455 d = gen_reg_rtx (V16QImode);
21456 op_false = gen_lowpart (V16QImode, op_false);
21457 op_true = gen_lowpart (V16QImode, op_true);
21458 cmp = gen_lowpart (V16QImode, cmp);
21463 gen = gen_avx_blendvps256;
21467 gen = gen_avx_blendvpd256;
21475 gen = gen_avx2_pblendvb;
21476 if (mode != V32QImode)
21477 d = gen_reg_rtx (V32QImode);
21478 op_false = gen_lowpart (V32QImode, op_false);
21479 op_true = gen_lowpart (V32QImode, op_true);
21480 cmp = gen_lowpart (V32QImode, cmp);
21485 gen = gen_avx512bw_blendmv64qi;
21488 gen = gen_avx512bw_blendmv32hi;
21491 gen = gen_avx512f_blendmv16si;
21494 gen = gen_avx512f_blendmv8di;
21497 gen = gen_avx512f_blendmv8df;
21500 gen = gen_avx512f_blendmv16sf;
21509 emit_insn (gen (d, op_false, op_true, cmp));
21511 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21515 op_true = force_reg (mode, op_true);
21517 t2 = gen_reg_rtx (mode);
21519 t3 = gen_reg_rtx (mode);
21523 x = gen_rtx_AND (mode, op_true, cmp);
21524 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21526 x = gen_rtx_NOT (mode, cmp);
21527 x = gen_rtx_AND (mode, x, op_false);
21528 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21530 x = gen_rtx_IOR (mode, t3, t2);
21531 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21536 /* Expand a floating-point conditional move. Return true if successful. */
21539 ix86_expand_fp_movcc (rtx operands[])
21541 machine_mode mode = GET_MODE (operands[0]);
21542 enum rtx_code code = GET_CODE (operands[1]);
21543 rtx tmp, compare_op;
21544 rtx op0 = XEXP (operands[1], 0);
21545 rtx op1 = XEXP (operands[1], 1);
21547 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21549 machine_mode cmode;
21551 /* Since we've no cmove for sse registers, don't force bad register
21552 allocation just to gain access to it. Deny movcc when the
21553 comparison mode doesn't match the move mode. */
21554 cmode = GET_MODE (op0);
21555 if (cmode == VOIDmode)
21556 cmode = GET_MODE (op1);
21560 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21561 if (code == UNKNOWN)
21564 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21565 operands[2], operands[3]))
21568 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21569 operands[2], operands[3]);
21570 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21574 if (GET_MODE (op0) == TImode
21575 || (GET_MODE (op0) == DImode
21579 /* The floating point conditional move instructions don't directly
21580 support conditions resulting from a signed integer comparison. */
21582 compare_op = ix86_expand_compare (code, op0, op1);
21583 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21585 tmp = gen_reg_rtx (QImode);
21586 ix86_expand_setcc (tmp, code, op0, op1);
21588 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21591 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21592 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21593 operands[2], operands[3])));
21598 /* Expand a floating-point vector conditional move; a vcond operation
21599 rather than a movcc operation. */
21602 ix86_expand_fp_vcond (rtx operands[])
21604 enum rtx_code code = GET_CODE (operands[3]);
21607 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21608 &operands[4], &operands[5]);
21609 if (code == UNKNOWN)
21612 switch (GET_CODE (operands[3]))
21615 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21616 operands[5], operands[0], operands[0]);
21617 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21618 operands[5], operands[1], operands[2]);
21622 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21623 operands[5], operands[0], operands[0]);
21624 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21625 operands[5], operands[1], operands[2]);
21629 gcc_unreachable ();
21631 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21633 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21637 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21638 operands[5], operands[1], operands[2]))
21641 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21642 operands[1], operands[2]);
21643 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21647 /* Expand a signed/unsigned integral vector conditional move. */
21650 ix86_expand_int_vcond (rtx operands[])
21652 machine_mode data_mode = GET_MODE (operands[0]);
21653 machine_mode mode = GET_MODE (operands[4]);
21654 enum rtx_code code = GET_CODE (operands[3]);
21655 bool negate = false;
21658 cop0 = operands[4];
21659 cop1 = operands[5];
21661 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21662 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21663 if ((code == LT || code == GE)
21664 && data_mode == mode
21665 && cop1 == CONST0_RTX (mode)
21666 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21667 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21668 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21669 && (GET_MODE_SIZE (data_mode) == 16
21670 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21672 rtx negop = operands[2 - (code == LT)];
21673 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21674 if (negop == CONST1_RTX (data_mode))
21676 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21677 operands[0], 1, OPTAB_DIRECT);
21678 if (res != operands[0])
21679 emit_move_insn (operands[0], res);
21682 else if (GET_MODE_INNER (data_mode) != DImode
21683 && vector_all_ones_operand (negop, data_mode))
21685 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21686 operands[0], 0, OPTAB_DIRECT);
21687 if (res != operands[0])
21688 emit_move_insn (operands[0], res);
21693 if (!nonimmediate_operand (cop1, mode))
21694 cop1 = force_reg (mode, cop1);
21695 if (!general_operand (operands[1], data_mode))
21696 operands[1] = force_reg (data_mode, operands[1]);
21697 if (!general_operand (operands[2], data_mode))
21698 operands[2] = force_reg (data_mode, operands[2]);
21700 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21702 && (mode == V16QImode || mode == V8HImode
21703 || mode == V4SImode || mode == V2DImode))
21707 /* Canonicalize the comparison to EQ, GT, GTU. */
21718 code = reverse_condition (code);
21724 code = reverse_condition (code);
21730 std::swap (cop0, cop1);
21731 code = swap_condition (code);
21735 gcc_unreachable ();
21738 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21739 if (mode == V2DImode)
21744 /* SSE4.1 supports EQ. */
21745 if (!TARGET_SSE4_1)
21751 /* SSE4.2 supports GT/GTU. */
21752 if (!TARGET_SSE4_2)
21757 gcc_unreachable ();
21761 /* Unsigned parallel compare is not supported by the hardware.
21762 Play some tricks to turn this into a signed comparison
21766 cop0 = force_reg (mode, cop0);
21778 rtx (*gen_sub3) (rtx, rtx, rtx);
21782 case V16SImode: gen_sub3 = gen_subv16si3; break;
21783 case V8DImode: gen_sub3 = gen_subv8di3; break;
21784 case V8SImode: gen_sub3 = gen_subv8si3; break;
21785 case V4DImode: gen_sub3 = gen_subv4di3; break;
21786 case V4SImode: gen_sub3 = gen_subv4si3; break;
21787 case V2DImode: gen_sub3 = gen_subv2di3; break;
21789 gcc_unreachable ();
21791 /* Subtract (-(INT MAX) - 1) from both operands to make
21793 mask = ix86_build_signbit_mask (mode, true, false);
21794 t1 = gen_reg_rtx (mode);
21795 emit_insn (gen_sub3 (t1, cop0, mask));
21797 t2 = gen_reg_rtx (mode);
21798 emit_insn (gen_sub3 (t2, cop1, mask));
21812 /* Perform a parallel unsigned saturating subtraction. */
21813 x = gen_reg_rtx (mode);
21814 emit_insn (gen_rtx_SET (VOIDmode, x,
21815 gen_rtx_US_MINUS (mode, cop0, cop1)));
21818 cop1 = CONST0_RTX (mode);
21824 gcc_unreachable ();
21829 /* Allow the comparison to be done in one mode, but the movcc to
21830 happen in another mode. */
21831 if (data_mode == mode)
21833 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21834 operands[1+negate], operands[2-negate]);
21838 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21839 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21840 operands[1+negate], operands[2-negate]);
21841 if (GET_MODE (x) == mode)
21842 x = gen_lowpart (data_mode, x);
21845 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21846 operands[2-negate]);
21850 /* AVX512F does support 64-byte integer vector operations,
21851 thus the longest vector we are faced with is V64QImode. */
21852 #define MAX_VECT_LEN 64
21854 struct expand_vec_perm_d
21856 rtx target, op0, op1;
21857 unsigned char perm[MAX_VECT_LEN];
21858 machine_mode vmode;
21859 unsigned char nelt;
21860 bool one_operand_p;
21865 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21866 struct expand_vec_perm_d *d)
21868 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21869 expander, so args are either in d, or in op0, op1 etc. */
21870 machine_mode mode = GET_MODE (d ? d->op0 : op0);
21871 machine_mode maskmode = mode;
21872 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21877 if (TARGET_AVX512VL && TARGET_AVX512BW)
21878 gen = gen_avx512vl_vpermi2varv8hi3;
21881 if (TARGET_AVX512VL && TARGET_AVX512BW)
21882 gen = gen_avx512vl_vpermi2varv16hi3;
21885 if (TARGET_AVX512VBMI)
21886 gen = gen_avx512bw_vpermi2varv64qi3;
21889 if (TARGET_AVX512BW)
21890 gen = gen_avx512bw_vpermi2varv32hi3;
21893 if (TARGET_AVX512VL)
21894 gen = gen_avx512vl_vpermi2varv4si3;
21897 if (TARGET_AVX512VL)
21898 gen = gen_avx512vl_vpermi2varv8si3;
21901 if (TARGET_AVX512F)
21902 gen = gen_avx512f_vpermi2varv16si3;
21905 if (TARGET_AVX512VL)
21907 gen = gen_avx512vl_vpermi2varv4sf3;
21908 maskmode = V4SImode;
21912 if (TARGET_AVX512VL)
21914 gen = gen_avx512vl_vpermi2varv8sf3;
21915 maskmode = V8SImode;
21919 if (TARGET_AVX512F)
21921 gen = gen_avx512f_vpermi2varv16sf3;
21922 maskmode = V16SImode;
21926 if (TARGET_AVX512VL)
21927 gen = gen_avx512vl_vpermi2varv2di3;
21930 if (TARGET_AVX512VL)
21931 gen = gen_avx512vl_vpermi2varv4di3;
21934 if (TARGET_AVX512F)
21935 gen = gen_avx512f_vpermi2varv8di3;
21938 if (TARGET_AVX512VL)
21940 gen = gen_avx512vl_vpermi2varv2df3;
21941 maskmode = V2DImode;
21945 if (TARGET_AVX512VL)
21947 gen = gen_avx512vl_vpermi2varv4df3;
21948 maskmode = V4DImode;
21952 if (TARGET_AVX512F)
21954 gen = gen_avx512f_vpermi2varv8df3;
21955 maskmode = V8DImode;
21965 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21966 expander, so args are either in d, or in op0, op1 etc. */
21970 target = d->target;
21973 for (int i = 0; i < d->nelt; ++i)
21974 vec[i] = GEN_INT (d->perm[i]);
21975 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
21978 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
21982 /* Expand a variable vector permutation. */
21985 ix86_expand_vec_perm (rtx operands[])
21987 rtx target = operands[0];
21988 rtx op0 = operands[1];
21989 rtx op1 = operands[2];
21990 rtx mask = operands[3];
21991 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21992 machine_mode mode = GET_MODE (op0);
21993 machine_mode maskmode = GET_MODE (mask);
21995 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21997 /* Number of elements in the vector. */
21998 w = GET_MODE_NUNITS (mode);
21999 e = GET_MODE_UNIT_SIZE (mode);
22000 gcc_assert (w <= 64);
22002 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
22007 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
22009 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
22010 an constant shuffle operand. With a tiny bit of effort we can
22011 use VPERMD instead. A re-interpretation stall for V4DFmode is
22012 unfortunate but there's no avoiding it.
22013 Similarly for V16HImode we don't have instructions for variable
22014 shuffling, while for V32QImode we can use after preparing suitable
22015 masks vpshufb; vpshufb; vpermq; vpor. */
22017 if (mode == V16HImode)
22019 maskmode = mode = V32QImode;
22025 maskmode = mode = V8SImode;
22029 t1 = gen_reg_rtx (maskmode);
22031 /* Replicate the low bits of the V4DImode mask into V8SImode:
22033 t1 = { A A B B C C D D }. */
22034 for (i = 0; i < w / 2; ++i)
22035 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
22036 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22037 vt = force_reg (maskmode, vt);
22038 mask = gen_lowpart (maskmode, mask);
22039 if (maskmode == V8SImode)
22040 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
22042 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
22044 /* Multiply the shuffle indicies by two. */
22045 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
22048 /* Add one to the odd shuffle indicies:
22049 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
22050 for (i = 0; i < w / 2; ++i)
22052 vec[i * 2] = const0_rtx;
22053 vec[i * 2 + 1] = const1_rtx;
22055 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22056 vt = validize_mem (force_const_mem (maskmode, vt));
22057 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22060 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
22061 operands[3] = mask = t1;
22062 target = gen_reg_rtx (mode);
22063 op0 = gen_lowpart (mode, op0);
22064 op1 = gen_lowpart (mode, op1);
22070 /* The VPERMD and VPERMPS instructions already properly ignore
22071 the high bits of the shuffle elements. No need for us to
22072 perform an AND ourselves. */
22073 if (one_operand_shuffle)
22075 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22076 if (target != operands[0])
22077 emit_move_insn (operands[0],
22078 gen_lowpart (GET_MODE (operands[0]), target));
22082 t1 = gen_reg_rtx (V8SImode);
22083 t2 = gen_reg_rtx (V8SImode);
22084 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22085 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22091 mask = gen_lowpart (V8SImode, mask);
22092 if (one_operand_shuffle)
22093 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22096 t1 = gen_reg_rtx (V8SFmode);
22097 t2 = gen_reg_rtx (V8SFmode);
22098 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22099 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22105 /* By combining the two 128-bit input vectors into one 256-bit
22106 input vector, we can use VPERMD and VPERMPS for the full
22107 two-operand shuffle. */
22108 t1 = gen_reg_rtx (V8SImode);
22109 t2 = gen_reg_rtx (V8SImode);
22110 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22111 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22112 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22113 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22117 t1 = gen_reg_rtx (V8SFmode);
22118 t2 = gen_reg_rtx (V8SImode);
22119 mask = gen_lowpart (V4SImode, mask);
22120 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22121 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22122 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22123 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22127 t1 = gen_reg_rtx (V32QImode);
22128 t2 = gen_reg_rtx (V32QImode);
22129 t3 = gen_reg_rtx (V32QImode);
22130 vt2 = GEN_INT (-128);
22131 for (i = 0; i < 32; i++)
22133 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22134 vt = force_reg (V32QImode, vt);
22135 for (i = 0; i < 32; i++)
22136 vec[i] = i < 16 ? vt2 : const0_rtx;
22137 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22138 vt2 = force_reg (V32QImode, vt2);
22139 /* From mask create two adjusted masks, which contain the same
22140 bits as mask in the low 7 bits of each vector element.
22141 The first mask will have the most significant bit clear
22142 if it requests element from the same 128-bit lane
22143 and MSB set if it requests element from the other 128-bit lane.
22144 The second mask will have the opposite values of the MSB,
22145 and additionally will have its 128-bit lanes swapped.
22146 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22147 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22148 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22149 stands for other 12 bytes. */
22150 /* The bit whether element is from the same lane or the other
22151 lane is bit 4, so shift it up by 3 to the MSB position. */
22152 t5 = gen_reg_rtx (V4DImode);
22153 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22155 /* Clear MSB bits from the mask just in case it had them set. */
22156 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22157 /* After this t1 will have MSB set for elements from other lane. */
22158 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22159 /* Clear bits other than MSB. */
22160 emit_insn (gen_andv32qi3 (t1, t1, vt));
22161 /* Or in the lower bits from mask into t3. */
22162 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22163 /* And invert MSB bits in t1, so MSB is set for elements from the same
22165 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22166 /* Swap 128-bit lanes in t3. */
22167 t6 = gen_reg_rtx (V4DImode);
22168 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22169 const2_rtx, GEN_INT (3),
22170 const0_rtx, const1_rtx));
22171 /* And or in the lower bits from mask into t1. */
22172 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22173 if (one_operand_shuffle)
22175 /* Each of these shuffles will put 0s in places where
22176 element from the other 128-bit lane is needed, otherwise
22177 will shuffle in the requested value. */
22178 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22179 gen_lowpart (V32QImode, t6)));
22180 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22181 /* For t3 the 128-bit lanes are swapped again. */
22182 t7 = gen_reg_rtx (V4DImode);
22183 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22184 const2_rtx, GEN_INT (3),
22185 const0_rtx, const1_rtx));
22186 /* And oring both together leads to the result. */
22187 emit_insn (gen_iorv32qi3 (target, t1,
22188 gen_lowpart (V32QImode, t7)));
22189 if (target != operands[0])
22190 emit_move_insn (operands[0],
22191 gen_lowpart (GET_MODE (operands[0]), target));
22195 t4 = gen_reg_rtx (V32QImode);
22196 /* Similarly to the above one_operand_shuffle code,
22197 just for repeated twice for each operand. merge_two:
22198 code will merge the two results together. */
22199 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22200 gen_lowpart (V32QImode, t6)));
22201 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22202 gen_lowpart (V32QImode, t6)));
22203 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22204 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22205 t7 = gen_reg_rtx (V4DImode);
22206 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22207 const2_rtx, GEN_INT (3),
22208 const0_rtx, const1_rtx));
22209 t8 = gen_reg_rtx (V4DImode);
22210 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22211 const2_rtx, GEN_INT (3),
22212 const0_rtx, const1_rtx));
22213 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22214 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22220 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22227 /* The XOP VPPERM insn supports three inputs. By ignoring the
22228 one_operand_shuffle special case, we avoid creating another
22229 set of constant vectors in memory. */
22230 one_operand_shuffle = false;
22232 /* mask = mask & {2*w-1, ...} */
22233 vt = GEN_INT (2*w - 1);
22237 /* mask = mask & {w-1, ...} */
22238 vt = GEN_INT (w - 1);
22241 for (i = 0; i < w; i++)
22243 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22244 mask = expand_simple_binop (maskmode, AND, mask, vt,
22245 NULL_RTX, 0, OPTAB_DIRECT);
22247 /* For non-QImode operations, convert the word permutation control
22248 into a byte permutation control. */
22249 if (mode != V16QImode)
22251 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22252 GEN_INT (exact_log2 (e)),
22253 NULL_RTX, 0, OPTAB_DIRECT);
22255 /* Convert mask to vector of chars. */
22256 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22258 /* Replicate each of the input bytes into byte positions:
22259 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22260 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22261 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22262 for (i = 0; i < 16; ++i)
22263 vec[i] = GEN_INT (i/e * e);
22264 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22265 vt = validize_mem (force_const_mem (V16QImode, vt));
22267 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22269 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22271 /* Convert it into the byte positions by doing
22272 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22273 for (i = 0; i < 16; ++i)
22274 vec[i] = GEN_INT (i % e);
22275 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22276 vt = validize_mem (force_const_mem (V16QImode, vt));
22277 emit_insn (gen_addv16qi3 (mask, mask, vt));
22280 /* The actual shuffle operations all operate on V16QImode. */
22281 op0 = gen_lowpart (V16QImode, op0);
22282 op1 = gen_lowpart (V16QImode, op1);
22286 if (GET_MODE (target) != V16QImode)
22287 target = gen_reg_rtx (V16QImode);
22288 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22289 if (target != operands[0])
22290 emit_move_insn (operands[0],
22291 gen_lowpart (GET_MODE (operands[0]), target));
22293 else if (one_operand_shuffle)
22295 if (GET_MODE (target) != V16QImode)
22296 target = gen_reg_rtx (V16QImode);
22297 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22298 if (target != operands[0])
22299 emit_move_insn (operands[0],
22300 gen_lowpart (GET_MODE (operands[0]), target));
22307 /* Shuffle the two input vectors independently. */
22308 t1 = gen_reg_rtx (V16QImode);
22309 t2 = gen_reg_rtx (V16QImode);
22310 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22311 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22314 /* Then merge them together. The key is whether any given control
22315 element contained a bit set that indicates the second word. */
22316 mask = operands[3];
22318 if (maskmode == V2DImode && !TARGET_SSE4_1)
22320 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22321 more shuffle to convert the V2DI input mask into a V4SI
22322 input mask. At which point the masking that expand_int_vcond
22323 will work as desired. */
22324 rtx t3 = gen_reg_rtx (V4SImode);
22325 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22326 const0_rtx, const0_rtx,
22327 const2_rtx, const2_rtx));
22329 maskmode = V4SImode;
22333 for (i = 0; i < w; i++)
22335 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22336 vt = force_reg (maskmode, vt);
22337 mask = expand_simple_binop (maskmode, AND, mask, vt,
22338 NULL_RTX, 0, OPTAB_DIRECT);
22340 if (GET_MODE (target) != mode)
22341 target = gen_reg_rtx (mode);
22343 xops[1] = gen_lowpart (mode, t2);
22344 xops[2] = gen_lowpart (mode, t1);
22345 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22348 ok = ix86_expand_int_vcond (xops);
22350 if (target != operands[0])
22351 emit_move_insn (operands[0],
22352 gen_lowpart (GET_MODE (operands[0]), target));
22356 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22357 true if we should do zero extension, else sign extension. HIGH_P is
22358 true if we want the N/2 high elements, else the low elements. */
22361 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22363 machine_mode imode = GET_MODE (src);
22368 rtx (*unpack)(rtx, rtx);
22369 rtx (*extract)(rtx, rtx) = NULL;
22370 machine_mode halfmode = BLKmode;
22376 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22378 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22379 halfmode = V32QImode;
22381 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22385 unpack = gen_avx2_zero_extendv16qiv16hi2;
22387 unpack = gen_avx2_sign_extendv16qiv16hi2;
22388 halfmode = V16QImode;
22390 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22394 unpack = gen_avx512f_zero_extendv16hiv16si2;
22396 unpack = gen_avx512f_sign_extendv16hiv16si2;
22397 halfmode = V16HImode;
22399 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22403 unpack = gen_avx2_zero_extendv8hiv8si2;
22405 unpack = gen_avx2_sign_extendv8hiv8si2;
22406 halfmode = V8HImode;
22408 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22412 unpack = gen_avx512f_zero_extendv8siv8di2;
22414 unpack = gen_avx512f_sign_extendv8siv8di2;
22415 halfmode = V8SImode;
22417 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22421 unpack = gen_avx2_zero_extendv4siv4di2;
22423 unpack = gen_avx2_sign_extendv4siv4di2;
22424 halfmode = V4SImode;
22426 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22430 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22432 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22436 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22438 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22442 unpack = gen_sse4_1_zero_extendv2siv2di2;
22444 unpack = gen_sse4_1_sign_extendv2siv2di2;
22447 gcc_unreachable ();
22450 if (GET_MODE_SIZE (imode) >= 32)
22452 tmp = gen_reg_rtx (halfmode);
22453 emit_insn (extract (tmp, src));
22457 /* Shift higher 8 bytes to lower 8 bytes. */
22458 tmp = gen_reg_rtx (V1TImode);
22459 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22461 tmp = gen_lowpart (imode, tmp);
22466 emit_insn (unpack (dest, tmp));
22470 rtx (*unpack)(rtx, rtx, rtx);
22476 unpack = gen_vec_interleave_highv16qi;
22478 unpack = gen_vec_interleave_lowv16qi;
22482 unpack = gen_vec_interleave_highv8hi;
22484 unpack = gen_vec_interleave_lowv8hi;
22488 unpack = gen_vec_interleave_highv4si;
22490 unpack = gen_vec_interleave_lowv4si;
22493 gcc_unreachable ();
22497 tmp = force_reg (imode, CONST0_RTX (imode));
22499 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22500 src, pc_rtx, pc_rtx);
22502 rtx tmp2 = gen_reg_rtx (imode);
22503 emit_insn (unpack (tmp2, src, tmp));
22504 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22508 /* Expand conditional increment or decrement using adb/sbb instructions.
22509 The default case using setcc followed by the conditional move can be
22510 done by generic code. */
22512 ix86_expand_int_addcc (rtx operands[])
22514 enum rtx_code code = GET_CODE (operands[1]);
22516 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22518 rtx val = const0_rtx;
22519 bool fpcmp = false;
22521 rtx op0 = XEXP (operands[1], 0);
22522 rtx op1 = XEXP (operands[1], 1);
22524 if (operands[3] != const1_rtx
22525 && operands[3] != constm1_rtx)
22527 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22529 code = GET_CODE (compare_op);
22531 flags = XEXP (compare_op, 0);
22533 if (GET_MODE (flags) == CCFPmode
22534 || GET_MODE (flags) == CCFPUmode)
22537 code = ix86_fp_compare_code_to_integer (code);
22544 PUT_CODE (compare_op,
22545 reverse_condition_maybe_unordered
22546 (GET_CODE (compare_op)));
22548 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22551 mode = GET_MODE (operands[0]);
22553 /* Construct either adc or sbb insn. */
22554 if ((code == LTU) == (operands[3] == constm1_rtx))
22559 insn = gen_subqi3_carry;
22562 insn = gen_subhi3_carry;
22565 insn = gen_subsi3_carry;
22568 insn = gen_subdi3_carry;
22571 gcc_unreachable ();
22579 insn = gen_addqi3_carry;
22582 insn = gen_addhi3_carry;
22585 insn = gen_addsi3_carry;
22588 insn = gen_adddi3_carry;
22591 gcc_unreachable ();
22594 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22600 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22601 but works for floating pointer parameters and nonoffsetable memories.
22602 For pushes, it returns just stack offsets; the values will be saved
22603 in the right order. Maximally three parts are generated. */
22606 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22611 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22613 size = (GET_MODE_SIZE (mode) + 4) / 8;
22615 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22616 gcc_assert (size >= 2 && size <= 4);
22618 /* Optimize constant pool reference to immediates. This is used by fp
22619 moves, that force all constants to memory to allow combining. */
22620 if (MEM_P (operand) && MEM_READONLY_P (operand))
22622 rtx tmp = maybe_get_pool_constant (operand);
22627 if (MEM_P (operand) && !offsettable_memref_p (operand))
22629 /* The only non-offsetable memories we handle are pushes. */
22630 int ok = push_operand (operand, VOIDmode);
22634 operand = copy_rtx (operand);
22635 PUT_MODE (operand, word_mode);
22636 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22640 if (GET_CODE (operand) == CONST_VECTOR)
22642 machine_mode imode = int_mode_for_mode (mode);
22643 /* Caution: if we looked through a constant pool memory above,
22644 the operand may actually have a different mode now. That's
22645 ok, since we want to pun this all the way back to an integer. */
22646 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22647 gcc_assert (operand != NULL);
22653 if (mode == DImode)
22654 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22659 if (REG_P (operand))
22661 gcc_assert (reload_completed);
22662 for (i = 0; i < size; i++)
22663 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22665 else if (offsettable_memref_p (operand))
22667 operand = adjust_address (operand, SImode, 0);
22668 parts[0] = operand;
22669 for (i = 1; i < size; i++)
22670 parts[i] = adjust_address (operand, SImode, 4 * i);
22672 else if (GET_CODE (operand) == CONST_DOUBLE)
22677 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22681 real_to_target (l, &r, mode);
22682 parts[3] = gen_int_mode (l[3], SImode);
22683 parts[2] = gen_int_mode (l[2], SImode);
22686 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22687 long double may not be 80-bit. */
22688 real_to_target (l, &r, mode);
22689 parts[2] = gen_int_mode (l[2], SImode);
22692 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22695 gcc_unreachable ();
22697 parts[1] = gen_int_mode (l[1], SImode);
22698 parts[0] = gen_int_mode (l[0], SImode);
22701 gcc_unreachable ();
22706 if (mode == TImode)
22707 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22708 if (mode == XFmode || mode == TFmode)
22710 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22711 if (REG_P (operand))
22713 gcc_assert (reload_completed);
22714 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22715 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22717 else if (offsettable_memref_p (operand))
22719 operand = adjust_address (operand, DImode, 0);
22720 parts[0] = operand;
22721 parts[1] = adjust_address (operand, upper_mode, 8);
22723 else if (GET_CODE (operand) == CONST_DOUBLE)
22728 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22729 real_to_target (l, &r, mode);
22731 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22732 if (HOST_BITS_PER_WIDE_INT >= 64)
22735 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22736 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22739 parts[0] = immed_double_const (l[0], l[1], DImode);
22741 if (upper_mode == SImode)
22742 parts[1] = gen_int_mode (l[2], SImode);
22743 else if (HOST_BITS_PER_WIDE_INT >= 64)
22746 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22747 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22750 parts[1] = immed_double_const (l[2], l[3], DImode);
22753 gcc_unreachable ();
22760 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22761 Return false when normal moves are needed; true when all required
22762 insns have been emitted. Operands 2-4 contain the input values
22763 int the correct order; operands 5-7 contain the output values. */
22766 ix86_split_long_move (rtx operands[])
22771 int collisions = 0;
22772 machine_mode mode = GET_MODE (operands[0]);
22773 bool collisionparts[4];
22775 /* The DFmode expanders may ask us to move double.
22776 For 64bit target this is single move. By hiding the fact
22777 here we simplify i386.md splitters. */
22778 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22780 /* Optimize constant pool reference to immediates. This is used by
22781 fp moves, that force all constants to memory to allow combining. */
22783 if (MEM_P (operands[1])
22784 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22785 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22786 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22787 if (push_operand (operands[0], VOIDmode))
22789 operands[0] = copy_rtx (operands[0]);
22790 PUT_MODE (operands[0], word_mode);
22793 operands[0] = gen_lowpart (DImode, operands[0]);
22794 operands[1] = gen_lowpart (DImode, operands[1]);
22795 emit_move_insn (operands[0], operands[1]);
22799 /* The only non-offsettable memory we handle is push. */
22800 if (push_operand (operands[0], VOIDmode))
22803 gcc_assert (!MEM_P (operands[0])
22804 || offsettable_memref_p (operands[0]));
22806 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22807 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22809 /* When emitting push, take care for source operands on the stack. */
22810 if (push && MEM_P (operands[1])
22811 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22813 rtx src_base = XEXP (part[1][nparts - 1], 0);
22815 /* Compensate for the stack decrement by 4. */
22816 if (!TARGET_64BIT && nparts == 3
22817 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22818 src_base = plus_constant (Pmode, src_base, 4);
22820 /* src_base refers to the stack pointer and is
22821 automatically decreased by emitted push. */
22822 for (i = 0; i < nparts; i++)
22823 part[1][i] = change_address (part[1][i],
22824 GET_MODE (part[1][i]), src_base);
22827 /* We need to do copy in the right order in case an address register
22828 of the source overlaps the destination. */
22829 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22833 for (i = 0; i < nparts; i++)
22836 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22837 if (collisionparts[i])
22841 /* Collision in the middle part can be handled by reordering. */
22842 if (collisions == 1 && nparts == 3 && collisionparts [1])
22844 std::swap (part[0][1], part[0][2]);
22845 std::swap (part[1][1], part[1][2]);
22847 else if (collisions == 1
22849 && (collisionparts [1] || collisionparts [2]))
22851 if (collisionparts [1])
22853 std::swap (part[0][1], part[0][2]);
22854 std::swap (part[1][1], part[1][2]);
22858 std::swap (part[0][2], part[0][3]);
22859 std::swap (part[1][2], part[1][3]);
22863 /* If there are more collisions, we can't handle it by reordering.
22864 Do an lea to the last part and use only one colliding move. */
22865 else if (collisions > 1)
22871 base = part[0][nparts - 1];
22873 /* Handle the case when the last part isn't valid for lea.
22874 Happens in 64-bit mode storing the 12-byte XFmode. */
22875 if (GET_MODE (base) != Pmode)
22876 base = gen_rtx_REG (Pmode, REGNO (base));
22878 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22879 part[1][0] = replace_equiv_address (part[1][0], base);
22880 for (i = 1; i < nparts; i++)
22882 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22883 part[1][i] = replace_equiv_address (part[1][i], tmp);
22894 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22895 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22896 stack_pointer_rtx, GEN_INT (-4)));
22897 emit_move_insn (part[0][2], part[1][2]);
22899 else if (nparts == 4)
22901 emit_move_insn (part[0][3], part[1][3]);
22902 emit_move_insn (part[0][2], part[1][2]);
22907 /* In 64bit mode we don't have 32bit push available. In case this is
22908 register, it is OK - we will just use larger counterpart. We also
22909 retype memory - these comes from attempt to avoid REX prefix on
22910 moving of second half of TFmode value. */
22911 if (GET_MODE (part[1][1]) == SImode)
22913 switch (GET_CODE (part[1][1]))
22916 part[1][1] = adjust_address (part[1][1], DImode, 0);
22920 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22924 gcc_unreachable ();
22927 if (GET_MODE (part[1][0]) == SImode)
22928 part[1][0] = part[1][1];
22931 emit_move_insn (part[0][1], part[1][1]);
22932 emit_move_insn (part[0][0], part[1][0]);
22936 /* Choose correct order to not overwrite the source before it is copied. */
22937 if ((REG_P (part[0][0])
22938 && REG_P (part[1][1])
22939 && (REGNO (part[0][0]) == REGNO (part[1][1])
22941 && REGNO (part[0][0]) == REGNO (part[1][2]))
22943 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22945 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22947 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22949 operands[2 + i] = part[0][j];
22950 operands[6 + i] = part[1][j];
22955 for (i = 0; i < nparts; i++)
22957 operands[2 + i] = part[0][i];
22958 operands[6 + i] = part[1][i];
22962 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22963 if (optimize_insn_for_size_p ())
22965 for (j = 0; j < nparts - 1; j++)
22966 if (CONST_INT_P (operands[6 + j])
22967 && operands[6 + j] != const0_rtx
22968 && REG_P (operands[2 + j]))
22969 for (i = j; i < nparts - 1; i++)
22970 if (CONST_INT_P (operands[7 + i])
22971 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22972 operands[7 + i] = operands[2 + j];
22975 for (i = 0; i < nparts; i++)
22976 emit_move_insn (operands[2 + i], operands[6 + i]);
22981 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22982 left shift by a constant, either using a single shift or
22983 a sequence of add instructions. */
22986 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
22988 rtx (*insn)(rtx, rtx, rtx);
22991 || (count * ix86_cost->add <= ix86_cost->shift_const
22992 && !optimize_insn_for_size_p ()))
22994 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22995 while (count-- > 0)
22996 emit_insn (insn (operand, operand, operand));
23000 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23001 emit_insn (insn (operand, operand, GEN_INT (count)));
23006 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
23008 rtx (*gen_ashl3)(rtx, rtx, rtx);
23009 rtx (*gen_shld)(rtx, rtx, rtx);
23010 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23012 rtx low[2], high[2];
23015 if (CONST_INT_P (operands[2]))
23017 split_double_mode (mode, operands, 2, low, high);
23018 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23020 if (count >= half_width)
23022 emit_move_insn (high[0], low[1]);
23023 emit_move_insn (low[0], const0_rtx);
23025 if (count > half_width)
23026 ix86_expand_ashl_const (high[0], count - half_width, mode);
23030 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23032 if (!rtx_equal_p (operands[0], operands[1]))
23033 emit_move_insn (operands[0], operands[1]);
23035 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
23036 ix86_expand_ashl_const (low[0], count, mode);
23041 split_double_mode (mode, operands, 1, low, high);
23043 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23045 if (operands[1] == const1_rtx)
23047 /* Assuming we've chosen a QImode capable registers, then 1 << N
23048 can be done with two 32/64-bit shifts, no branches, no cmoves. */
23049 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
23051 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
23053 ix86_expand_clear (low[0]);
23054 ix86_expand_clear (high[0]);
23055 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23057 d = gen_lowpart (QImode, low[0]);
23058 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23059 s = gen_rtx_EQ (QImode, flags, const0_rtx);
23060 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23062 d = gen_lowpart (QImode, high[0]);
23063 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23064 s = gen_rtx_NE (QImode, flags, const0_rtx);
23065 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23068 /* Otherwise, we can get the same results by manually performing
23069 a bit extract operation on bit 5/6, and then performing the two
23070 shifts. The two methods of getting 0/1 into low/high are exactly
23071 the same size. Avoiding the shift in the bit extract case helps
23072 pentium4 a bit; no one else seems to care much either way. */
23075 machine_mode half_mode;
23076 rtx (*gen_lshr3)(rtx, rtx, rtx);
23077 rtx (*gen_and3)(rtx, rtx, rtx);
23078 rtx (*gen_xor3)(rtx, rtx, rtx);
23079 HOST_WIDE_INT bits;
23082 if (mode == DImode)
23084 half_mode = SImode;
23085 gen_lshr3 = gen_lshrsi3;
23086 gen_and3 = gen_andsi3;
23087 gen_xor3 = gen_xorsi3;
23092 half_mode = DImode;
23093 gen_lshr3 = gen_lshrdi3;
23094 gen_and3 = gen_anddi3;
23095 gen_xor3 = gen_xordi3;
23099 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23100 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23102 x = gen_lowpart (half_mode, operands[2]);
23103 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
23105 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23106 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23107 emit_move_insn (low[0], high[0]);
23108 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23111 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23112 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23116 if (operands[1] == constm1_rtx)
23118 /* For -1 << N, we can avoid the shld instruction, because we
23119 know that we're shifting 0...31/63 ones into a -1. */
23120 emit_move_insn (low[0], constm1_rtx);
23121 if (optimize_insn_for_size_p ())
23122 emit_move_insn (high[0], low[0]);
23124 emit_move_insn (high[0], constm1_rtx);
23128 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23130 if (!rtx_equal_p (operands[0], operands[1]))
23131 emit_move_insn (operands[0], operands[1]);
23133 split_double_mode (mode, operands, 1, low, high);
23134 emit_insn (gen_shld (high[0], low[0], operands[2]));
23137 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23139 if (TARGET_CMOVE && scratch)
23141 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23142 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23144 ix86_expand_clear (scratch);
23145 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23149 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23150 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23152 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23157 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23159 rtx (*gen_ashr3)(rtx, rtx, rtx)
23160 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23161 rtx (*gen_shrd)(rtx, rtx, rtx);
23162 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23164 rtx low[2], high[2];
23167 if (CONST_INT_P (operands[2]))
23169 split_double_mode (mode, operands, 2, low, high);
23170 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23172 if (count == GET_MODE_BITSIZE (mode) - 1)
23174 emit_move_insn (high[0], high[1]);
23175 emit_insn (gen_ashr3 (high[0], high[0],
23176 GEN_INT (half_width - 1)));
23177 emit_move_insn (low[0], high[0]);
23180 else if (count >= half_width)
23182 emit_move_insn (low[0], high[1]);
23183 emit_move_insn (high[0], low[0]);
23184 emit_insn (gen_ashr3 (high[0], high[0],
23185 GEN_INT (half_width - 1)));
23187 if (count > half_width)
23188 emit_insn (gen_ashr3 (low[0], low[0],
23189 GEN_INT (count - half_width)));
23193 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23195 if (!rtx_equal_p (operands[0], operands[1]))
23196 emit_move_insn (operands[0], operands[1]);
23198 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23199 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23204 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23206 if (!rtx_equal_p (operands[0], operands[1]))
23207 emit_move_insn (operands[0], operands[1]);
23209 split_double_mode (mode, operands, 1, low, high);
23211 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23212 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23214 if (TARGET_CMOVE && scratch)
23216 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23217 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23219 emit_move_insn (scratch, high[0]);
23220 emit_insn (gen_ashr3 (scratch, scratch,
23221 GEN_INT (half_width - 1)));
23222 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23227 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23228 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23230 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23236 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23238 rtx (*gen_lshr3)(rtx, rtx, rtx)
23239 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23240 rtx (*gen_shrd)(rtx, rtx, rtx);
23241 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23243 rtx low[2], high[2];
23246 if (CONST_INT_P (operands[2]))
23248 split_double_mode (mode, operands, 2, low, high);
23249 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23251 if (count >= half_width)
23253 emit_move_insn (low[0], high[1]);
23254 ix86_expand_clear (high[0]);
23256 if (count > half_width)
23257 emit_insn (gen_lshr3 (low[0], low[0],
23258 GEN_INT (count - half_width)));
23262 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23264 if (!rtx_equal_p (operands[0], operands[1]))
23265 emit_move_insn (operands[0], operands[1]);
23267 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23268 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23273 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23275 if (!rtx_equal_p (operands[0], operands[1]))
23276 emit_move_insn (operands[0], operands[1]);
23278 split_double_mode (mode, operands, 1, low, high);
23280 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23281 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23283 if (TARGET_CMOVE && scratch)
23285 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23286 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23288 ix86_expand_clear (scratch);
23289 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23294 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23295 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23297 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23302 /* Predict just emitted jump instruction to be taken with probability PROB. */
23304 predict_jump (int prob)
23306 rtx insn = get_last_insn ();
23307 gcc_assert (JUMP_P (insn));
23308 add_int_reg_note (insn, REG_BR_PROB, prob);
23311 /* Helper function for the string operations below. Dest VARIABLE whether
23312 it is aligned to VALUE bytes. If true, jump to the label. */
23313 static rtx_code_label *
23314 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23316 rtx_code_label *label = gen_label_rtx ();
23317 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23318 if (GET_MODE (variable) == DImode)
23319 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23321 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23322 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23325 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23327 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23331 /* Adjust COUNTER by the VALUE. */
23333 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23335 rtx (*gen_add)(rtx, rtx, rtx)
23336 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23338 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23341 /* Zero extend possibly SImode EXP to Pmode register. */
23343 ix86_zero_extend_to_Pmode (rtx exp)
23345 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23348 /* Divide COUNTREG by SCALE. */
23350 scale_counter (rtx countreg, int scale)
23356 if (CONST_INT_P (countreg))
23357 return GEN_INT (INTVAL (countreg) / scale);
23358 gcc_assert (REG_P (countreg));
23360 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23361 GEN_INT (exact_log2 (scale)),
23362 NULL, 1, OPTAB_DIRECT);
23366 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23367 DImode for constant loop counts. */
23369 static machine_mode
23370 counter_mode (rtx count_exp)
23372 if (GET_MODE (count_exp) != VOIDmode)
23373 return GET_MODE (count_exp);
23374 if (!CONST_INT_P (count_exp))
23376 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23381 /* Copy the address to a Pmode register. This is used for x32 to
23382 truncate DImode TLS address to a SImode register. */
23385 ix86_copy_addr_to_reg (rtx addr)
23387 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23388 return copy_addr_to_reg (addr);
23391 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23392 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
23396 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23397 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23398 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23399 memory by VALUE (supposed to be in MODE).
23401 The size is rounded down to whole number of chunk size moved at once.
23402 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23406 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23407 rtx destptr, rtx srcptr, rtx value,
23408 rtx count, machine_mode mode, int unroll,
23409 int expected_size, bool issetmem)
23411 rtx_code_label *out_label, *top_label;
23413 machine_mode iter_mode = counter_mode (count);
23414 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23415 rtx piece_size = GEN_INT (piece_size_n);
23416 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23420 top_label = gen_label_rtx ();
23421 out_label = gen_label_rtx ();
23422 iter = gen_reg_rtx (iter_mode);
23424 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23425 NULL, 1, OPTAB_DIRECT);
23426 /* Those two should combine. */
23427 if (piece_size == const1_rtx)
23429 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23431 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23433 emit_move_insn (iter, const0_rtx);
23435 emit_label (top_label);
23437 tmp = convert_modes (Pmode, iter_mode, iter, true);
23439 /* This assert could be relaxed - in this case we'll need to compute
23440 smallest power of two, containing in PIECE_SIZE_N and pass it to
23442 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23443 destmem = offset_address (destmem, tmp, piece_size_n);
23444 destmem = adjust_address (destmem, mode, 0);
23448 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23449 srcmem = adjust_address (srcmem, mode, 0);
23451 /* When unrolling for chips that reorder memory reads and writes,
23452 we can save registers by using single temporary.
23453 Also using 4 temporaries is overkill in 32bit mode. */
23454 if (!TARGET_64BIT && 0)
23456 for (i = 0; i < unroll; i++)
23461 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23463 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23465 emit_move_insn (destmem, srcmem);
23471 gcc_assert (unroll <= 4);
23472 for (i = 0; i < unroll; i++)
23474 tmpreg[i] = gen_reg_rtx (mode);
23478 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23480 emit_move_insn (tmpreg[i], srcmem);
23482 for (i = 0; i < unroll; i++)
23487 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23489 emit_move_insn (destmem, tmpreg[i]);
23494 for (i = 0; i < unroll; i++)
23498 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23499 emit_move_insn (destmem, value);
23502 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23503 true, OPTAB_LIB_WIDEN);
23505 emit_move_insn (iter, tmp);
23507 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23509 if (expected_size != -1)
23511 expected_size /= GET_MODE_SIZE (mode) * unroll;
23512 if (expected_size == 0)
23514 else if (expected_size > REG_BR_PROB_BASE)
23515 predict_jump (REG_BR_PROB_BASE - 1);
23517 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23520 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23521 iter = ix86_zero_extend_to_Pmode (iter);
23522 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23523 true, OPTAB_LIB_WIDEN);
23524 if (tmp != destptr)
23525 emit_move_insn (destptr, tmp);
23528 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23529 true, OPTAB_LIB_WIDEN);
23531 emit_move_insn (srcptr, tmp);
23533 emit_label (out_label);
23536 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23537 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23538 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23539 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23540 ORIG_VALUE is the original value passed to memset to fill the memory with.
23541 Other arguments have same meaning as for previous function. */
23544 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23545 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23547 machine_mode mode, bool issetmem)
23552 HOST_WIDE_INT rounded_count;
23554 /* If possible, it is shorter to use rep movs.
23555 TODO: Maybe it is better to move this logic to decide_alg. */
23556 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23557 && (!issetmem || orig_value == const0_rtx))
23560 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23561 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23563 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23564 GET_MODE_SIZE (mode)));
23565 if (mode != QImode)
23567 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23568 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23569 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23572 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23573 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23575 rounded_count = (INTVAL (count)
23576 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23577 destmem = shallow_copy_rtx (destmem);
23578 set_mem_size (destmem, rounded_count);
23580 else if (MEM_SIZE_KNOWN_P (destmem))
23581 clear_mem_size (destmem);
23585 value = force_reg (mode, gen_lowpart (mode, value));
23586 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23590 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23591 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23592 if (mode != QImode)
23594 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23595 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23596 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23599 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23600 if (CONST_INT_P (count))
23602 rounded_count = (INTVAL (count)
23603 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23604 srcmem = shallow_copy_rtx (srcmem);
23605 set_mem_size (srcmem, rounded_count);
23609 if (MEM_SIZE_KNOWN_P (srcmem))
23610 clear_mem_size (srcmem);
23612 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23617 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23619 SRC is passed by pointer to be updated on return.
23620 Return value is updated DST. */
23622 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23623 HOST_WIDE_INT size_to_move)
23625 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23626 enum insn_code code;
23627 machine_mode move_mode;
23630 /* Find the widest mode in which we could perform moves.
23631 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23632 it until move of such size is supported. */
23633 piece_size = 1 << floor_log2 (size_to_move);
23634 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23635 code = optab_handler (mov_optab, move_mode);
23636 while (code == CODE_FOR_nothing && piece_size > 1)
23639 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23640 code = optab_handler (mov_optab, move_mode);
23643 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23644 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23645 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23647 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23648 move_mode = mode_for_vector (word_mode, nunits);
23649 code = optab_handler (mov_optab, move_mode);
23650 if (code == CODE_FOR_nothing)
23652 move_mode = word_mode;
23653 piece_size = GET_MODE_SIZE (move_mode);
23654 code = optab_handler (mov_optab, move_mode);
23657 gcc_assert (code != CODE_FOR_nothing);
23659 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23660 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23662 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23663 gcc_assert (size_to_move % piece_size == 0);
23664 adjust = GEN_INT (piece_size);
23665 for (i = 0; i < size_to_move; i += piece_size)
23667 /* We move from memory to memory, so we'll need to do it via
23668 a temporary register. */
23669 tempreg = gen_reg_rtx (move_mode);
23670 emit_insn (GEN_FCN (code) (tempreg, src));
23671 emit_insn (GEN_FCN (code) (dst, tempreg));
23673 emit_move_insn (destptr,
23674 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23675 emit_move_insn (srcptr,
23676 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23678 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23680 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23684 /* Update DST and SRC rtx. */
23689 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23691 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23692 rtx destptr, rtx srcptr, rtx count, int max_size)
23695 if (CONST_INT_P (count))
23697 HOST_WIDE_INT countval = INTVAL (count);
23698 HOST_WIDE_INT epilogue_size = countval % max_size;
23701 /* For now MAX_SIZE should be a power of 2. This assert could be
23702 relaxed, but it'll require a bit more complicated epilogue
23704 gcc_assert ((max_size & (max_size - 1)) == 0);
23705 for (i = max_size; i >= 1; i >>= 1)
23707 if (epilogue_size & i)
23708 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23714 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23715 count, 1, OPTAB_DIRECT);
23716 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23717 count, QImode, 1, 4, false);
23721 /* When there are stringops, we can cheaply increase dest and src pointers.
23722 Otherwise we save code size by maintaining offset (zero is readily
23723 available from preceding rep operation) and using x86 addressing modes.
23725 if (TARGET_SINGLE_STRINGOP)
23729 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23730 src = change_address (srcmem, SImode, srcptr);
23731 dest = change_address (destmem, SImode, destptr);
23732 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23733 emit_label (label);
23734 LABEL_NUSES (label) = 1;
23738 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23739 src = change_address (srcmem, HImode, srcptr);
23740 dest = change_address (destmem, HImode, destptr);
23741 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23742 emit_label (label);
23743 LABEL_NUSES (label) = 1;
23747 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23748 src = change_address (srcmem, QImode, srcptr);
23749 dest = change_address (destmem, QImode, destptr);
23750 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23751 emit_label (label);
23752 LABEL_NUSES (label) = 1;
23757 rtx offset = force_reg (Pmode, const0_rtx);
23762 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23763 src = change_address (srcmem, SImode, srcptr);
23764 dest = change_address (destmem, SImode, destptr);
23765 emit_move_insn (dest, src);
23766 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23767 true, OPTAB_LIB_WIDEN);
23769 emit_move_insn (offset, tmp);
23770 emit_label (label);
23771 LABEL_NUSES (label) = 1;
23775 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23776 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23777 src = change_address (srcmem, HImode, tmp);
23778 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23779 dest = change_address (destmem, HImode, tmp);
23780 emit_move_insn (dest, src);
23781 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23782 true, OPTAB_LIB_WIDEN);
23784 emit_move_insn (offset, tmp);
23785 emit_label (label);
23786 LABEL_NUSES (label) = 1;
23790 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23791 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23792 src = change_address (srcmem, QImode, tmp);
23793 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23794 dest = change_address (destmem, QImode, tmp);
23795 emit_move_insn (dest, src);
23796 emit_label (label);
23797 LABEL_NUSES (label) = 1;
23802 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23803 with value PROMOTED_VAL.
23804 SRC is passed by pointer to be updated on return.
23805 Return value is updated DST. */
23807 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23808 HOST_WIDE_INT size_to_move)
23810 rtx dst = destmem, adjust;
23811 enum insn_code code;
23812 machine_mode move_mode;
23815 /* Find the widest mode in which we could perform moves.
23816 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23817 it until move of such size is supported. */
23818 move_mode = GET_MODE (promoted_val);
23819 if (move_mode == VOIDmode)
23820 move_mode = QImode;
23821 if (size_to_move < GET_MODE_SIZE (move_mode))
23823 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23824 promoted_val = gen_lowpart (move_mode, promoted_val);
23826 piece_size = GET_MODE_SIZE (move_mode);
23827 code = optab_handler (mov_optab, move_mode);
23828 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23830 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23832 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23833 gcc_assert (size_to_move % piece_size == 0);
23834 adjust = GEN_INT (piece_size);
23835 for (i = 0; i < size_to_move; i += piece_size)
23837 if (piece_size <= GET_MODE_SIZE (word_mode))
23839 emit_insn (gen_strset (destptr, dst, promoted_val));
23840 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23845 emit_insn (GEN_FCN (code) (dst, promoted_val));
23847 emit_move_insn (destptr,
23848 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23850 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23854 /* Update DST rtx. */
23857 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23859 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23860 rtx count, int max_size)
23863 expand_simple_binop (counter_mode (count), AND, count,
23864 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23865 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23866 gen_lowpart (QImode, value), count, QImode,
23867 1, max_size / 2, true);
23870 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23872 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23873 rtx count, int max_size)
23877 if (CONST_INT_P (count))
23879 HOST_WIDE_INT countval = INTVAL (count);
23880 HOST_WIDE_INT epilogue_size = countval % max_size;
23883 /* For now MAX_SIZE should be a power of 2. This assert could be
23884 relaxed, but it'll require a bit more complicated epilogue
23886 gcc_assert ((max_size & (max_size - 1)) == 0);
23887 for (i = max_size; i >= 1; i >>= 1)
23889 if (epilogue_size & i)
23891 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23892 destmem = emit_memset (destmem, destptr, vec_value, i);
23894 destmem = emit_memset (destmem, destptr, value, i);
23901 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23906 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23909 dest = change_address (destmem, DImode, destptr);
23910 emit_insn (gen_strset (destptr, dest, value));
23911 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23912 emit_insn (gen_strset (destptr, dest, value));
23916 dest = change_address (destmem, SImode, destptr);
23917 emit_insn (gen_strset (destptr, dest, value));
23918 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23919 emit_insn (gen_strset (destptr, dest, value));
23920 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23921 emit_insn (gen_strset (destptr, dest, value));
23922 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23923 emit_insn (gen_strset (destptr, dest, value));
23925 emit_label (label);
23926 LABEL_NUSES (label) = 1;
23930 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
23933 dest = change_address (destmem, DImode, destptr);
23934 emit_insn (gen_strset (destptr, dest, value));
23938 dest = change_address (destmem, SImode, destptr);
23939 emit_insn (gen_strset (destptr, dest, value));
23940 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23941 emit_insn (gen_strset (destptr, dest, value));
23943 emit_label (label);
23944 LABEL_NUSES (label) = 1;
23948 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23949 dest = change_address (destmem, SImode, destptr);
23950 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23951 emit_label (label);
23952 LABEL_NUSES (label) = 1;
23956 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23957 dest = change_address (destmem, HImode, destptr);
23958 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23959 emit_label (label);
23960 LABEL_NUSES (label) = 1;
23964 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23965 dest = change_address (destmem, QImode, destptr);
23966 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23967 emit_label (label);
23968 LABEL_NUSES (label) = 1;
23972 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23973 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23974 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23976 Return value is updated DESTMEM. */
23978 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23979 rtx destptr, rtx srcptr, rtx value,
23980 rtx vec_value, rtx count, int align,
23981 int desired_alignment, bool issetmem)
23984 for (i = 1; i < desired_alignment; i <<= 1)
23988 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
23991 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23992 destmem = emit_memset (destmem, destptr, vec_value, i);
23994 destmem = emit_memset (destmem, destptr, value, i);
23997 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23998 ix86_adjust_counter (count, i);
23999 emit_label (label);
24000 LABEL_NUSES (label) = 1;
24001 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
24007 /* Test if COUNT&SIZE is nonzero and if so, expand movme
24008 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
24009 and jump to DONE_LABEL. */
24011 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
24012 rtx destptr, rtx srcptr,
24013 rtx value, rtx vec_value,
24014 rtx count, int size,
24015 rtx done_label, bool issetmem)
24017 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
24018 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
24022 /* If we do not have vector value to copy, we must reduce size. */
24027 if (GET_MODE (value) == VOIDmode && size > 8)
24029 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
24030 mode = GET_MODE (value);
24033 mode = GET_MODE (vec_value), value = vec_value;
24037 /* Choose appropriate vector mode. */
24039 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
24040 else if (size >= 16)
24041 mode = TARGET_SSE ? V16QImode : DImode;
24042 srcmem = change_address (srcmem, mode, srcptr);
24044 destmem = change_address (destmem, mode, destptr);
24045 modesize = GEN_INT (GET_MODE_SIZE (mode));
24046 gcc_assert (GET_MODE_SIZE (mode) <= size);
24047 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24050 emit_move_insn (destmem, gen_lowpart (mode, value));
24053 emit_move_insn (destmem, srcmem);
24054 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24056 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24059 destmem = offset_address (destmem, count, 1);
24060 destmem = offset_address (destmem, GEN_INT (-2 * size),
24061 GET_MODE_SIZE (mode));
24064 srcmem = offset_address (srcmem, count, 1);
24065 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24066 GET_MODE_SIZE (mode));
24068 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24071 emit_move_insn (destmem, gen_lowpart (mode, value));
24074 emit_move_insn (destmem, srcmem);
24075 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24077 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24079 emit_jump_insn (gen_jump (done_label));
24082 emit_label (label);
24083 LABEL_NUSES (label) = 1;
24086 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24087 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24088 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24089 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24090 DONE_LABEL is a label after the whole copying sequence. The label is created
24091 on demand if *DONE_LABEL is NULL.
24092 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
24093 bounds after the initial copies.
24095 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24096 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24097 we will dispatch to a library call for large blocks.
24099 In pseudocode we do:
24103 Assume that SIZE is 4. Bigger sizes are handled analogously
24106 copy 4 bytes from SRCPTR to DESTPTR
24107 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24112 copy 1 byte from SRCPTR to DESTPTR
24115 copy 2 bytes from SRCPTR to DESTPTR
24116 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24121 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24122 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24124 OLD_DESPTR = DESTPTR;
24125 Align DESTPTR up to DESIRED_ALIGN
24126 SRCPTR += DESTPTR - OLD_DESTPTR
24127 COUNT -= DEST_PTR - OLD_DESTPTR
24129 Round COUNT down to multiple of SIZE
24130 << optional caller supplied zero size guard is here >>
24131 << optional caller suppplied dynamic check is here >>
24132 << caller supplied main copy loop is here >>
24137 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24138 rtx *destptr, rtx *srcptr,
24140 rtx value, rtx vec_value,
24142 rtx_code_label **done_label,
24146 unsigned HOST_WIDE_INT *min_size,
24147 bool dynamic_check,
24150 rtx_code_label *loop_label = NULL, *label;
24153 int prolog_size = 0;
24156 /* Chose proper value to copy. */
24157 if (issetmem && VECTOR_MODE_P (mode))
24158 mode_value = vec_value;
24160 mode_value = value;
24161 gcc_assert (GET_MODE_SIZE (mode) <= size);
24163 /* See if block is big or small, handle small blocks. */
24164 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24167 loop_label = gen_label_rtx ();
24170 *done_label = gen_label_rtx ();
24172 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24176 /* Handle sizes > 3. */
24177 for (;size2 > 2; size2 >>= 1)
24178 expand_small_movmem_or_setmem (destmem, srcmem,
24182 size2, *done_label, issetmem);
24183 /* Nothing to copy? Jump to DONE_LABEL if so */
24184 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24187 /* Do a byte copy. */
24188 destmem = change_address (destmem, QImode, *destptr);
24190 emit_move_insn (destmem, gen_lowpart (QImode, value));
24193 srcmem = change_address (srcmem, QImode, *srcptr);
24194 emit_move_insn (destmem, srcmem);
24197 /* Handle sizes 2 and 3. */
24198 label = ix86_expand_aligntest (*count, 2, false);
24199 destmem = change_address (destmem, HImode, *destptr);
24200 destmem = offset_address (destmem, *count, 1);
24201 destmem = offset_address (destmem, GEN_INT (-2), 2);
24203 emit_move_insn (destmem, gen_lowpart (HImode, value));
24206 srcmem = change_address (srcmem, HImode, *srcptr);
24207 srcmem = offset_address (srcmem, *count, 1);
24208 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24209 emit_move_insn (destmem, srcmem);
24212 emit_label (label);
24213 LABEL_NUSES (label) = 1;
24214 emit_jump_insn (gen_jump (*done_label));
24218 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24219 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24221 /* Start memcpy for COUNT >= SIZE. */
24224 emit_label (loop_label);
24225 LABEL_NUSES (loop_label) = 1;
24228 /* Copy first desired_align bytes. */
24230 srcmem = change_address (srcmem, mode, *srcptr);
24231 destmem = change_address (destmem, mode, *destptr);
24232 modesize = GEN_INT (GET_MODE_SIZE (mode));
24233 for (n = 0; prolog_size < desired_align - align; n++)
24236 emit_move_insn (destmem, mode_value);
24239 emit_move_insn (destmem, srcmem);
24240 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24242 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24243 prolog_size += GET_MODE_SIZE (mode);
24247 /* Copy last SIZE bytes. */
24248 destmem = offset_address (destmem, *count, 1);
24249 destmem = offset_address (destmem,
24250 GEN_INT (-size - prolog_size),
24253 emit_move_insn (destmem, mode_value);
24256 srcmem = offset_address (srcmem, *count, 1);
24257 srcmem = offset_address (srcmem,
24258 GEN_INT (-size - prolog_size),
24260 emit_move_insn (destmem, srcmem);
24262 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24264 destmem = offset_address (destmem, modesize, 1);
24266 emit_move_insn (destmem, mode_value);
24269 srcmem = offset_address (srcmem, modesize, 1);
24270 emit_move_insn (destmem, srcmem);
24274 /* Align destination. */
24275 if (desired_align > 1 && desired_align > align)
24277 rtx saveddest = *destptr;
24279 gcc_assert (desired_align <= size);
24280 /* Align destptr up, place it to new register. */
24281 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24282 GEN_INT (prolog_size),
24283 NULL_RTX, 1, OPTAB_DIRECT);
24284 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24285 GEN_INT (-desired_align),
24286 *destptr, 1, OPTAB_DIRECT);
24287 /* See how many bytes we skipped. */
24288 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24290 saveddest, 1, OPTAB_DIRECT);
24291 /* Adjust srcptr and count. */
24293 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
24294 *srcptr, 1, OPTAB_DIRECT);
24295 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24296 saveddest, *count, 1, OPTAB_DIRECT);
24297 /* We copied at most size + prolog_size. */
24298 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24299 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24303 /* Our loops always round down the bock size, but for dispatch to library
24304 we need precise value. */
24306 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24307 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24311 gcc_assert (prolog_size == 0);
24312 /* Decrease count, so we won't end up copying last word twice. */
24313 if (!CONST_INT_P (*count))
24314 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24315 constm1_rtx, *count, 1, OPTAB_DIRECT);
24317 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24319 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24324 /* This function is like the previous one, except here we know how many bytes
24325 need to be copied. That allows us to update alignment not only of DST, which
24326 is returned, but also of SRC, which is passed as a pointer for that
24329 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24330 rtx srcreg, rtx value, rtx vec_value,
24331 int desired_align, int align_bytes,
24335 rtx orig_dst = dst;
24336 rtx orig_src = NULL;
24337 int piece_size = 1;
24338 int copied_bytes = 0;
24342 gcc_assert (srcp != NULL);
24347 for (piece_size = 1;
24348 piece_size <= desired_align && copied_bytes < align_bytes;
24351 if (align_bytes & piece_size)
24355 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24356 dst = emit_memset (dst, destreg, vec_value, piece_size);
24358 dst = emit_memset (dst, destreg, value, piece_size);
24361 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24362 copied_bytes += piece_size;
24365 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24366 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24367 if (MEM_SIZE_KNOWN_P (orig_dst))
24368 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24372 int src_align_bytes = get_mem_align_offset (src, desired_align
24374 if (src_align_bytes >= 0)
24375 src_align_bytes = desired_align - src_align_bytes;
24376 if (src_align_bytes >= 0)
24378 unsigned int src_align;
24379 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24381 if ((src_align_bytes & (src_align - 1))
24382 == (align_bytes & (src_align - 1)))
24385 if (src_align > (unsigned int) desired_align)
24386 src_align = desired_align;
24387 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24388 set_mem_align (src, src_align * BITS_PER_UNIT);
24390 if (MEM_SIZE_KNOWN_P (orig_src))
24391 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24398 /* Return true if ALG can be used in current context.
24399 Assume we expand memset if MEMSET is true. */
24401 alg_usable_p (enum stringop_alg alg, bool memset)
24403 if (alg == no_stringop)
24405 if (alg == vector_loop)
24406 return TARGET_SSE || TARGET_AVX;
24407 /* Algorithms using the rep prefix want at least edi and ecx;
24408 additionally, memset wants eax and memcpy wants esi. Don't
24409 consider such algorithms if the user has appropriated those
24410 registers for their own purposes. */
24411 if (alg == rep_prefix_1_byte
24412 || alg == rep_prefix_4_byte
24413 || alg == rep_prefix_8_byte)
24414 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24415 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24419 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24420 static enum stringop_alg
24421 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24422 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24423 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24425 const struct stringop_algs * algs;
24426 bool optimize_for_speed;
24428 const struct processor_costs *cost;
24430 bool any_alg_usable_p = false;
24433 *dynamic_check = -1;
24435 /* Even if the string operation call is cold, we still might spend a lot
24436 of time processing large blocks. */
24437 if (optimize_function_for_size_p (cfun)
24438 || (optimize_insn_for_size_p ()
24440 || (expected_size != -1 && expected_size < 256))))
24441 optimize_for_speed = false;
24443 optimize_for_speed = true;
24445 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24447 algs = &cost->memset[TARGET_64BIT != 0];
24449 algs = &cost->memcpy[TARGET_64BIT != 0];
24451 /* See maximal size for user defined algorithm. */
24452 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24454 enum stringop_alg candidate = algs->size[i].alg;
24455 bool usable = alg_usable_p (candidate, memset);
24456 any_alg_usable_p |= usable;
24458 if (candidate != libcall && candidate && usable)
24459 max = algs->size[i].max;
24462 /* If expected size is not known but max size is small enough
24463 so inline version is a win, set expected size into
24465 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24466 && expected_size == -1)
24467 expected_size = min_size / 2 + max_size / 2;
24469 /* If user specified the algorithm, honnor it if possible. */
24470 if (ix86_stringop_alg != no_stringop
24471 && alg_usable_p (ix86_stringop_alg, memset))
24472 return ix86_stringop_alg;
24473 /* rep; movq or rep; movl is the smallest variant. */
24474 else if (!optimize_for_speed)
24477 if (!count || (count & 3) || (memset && !zero_memset))
24478 return alg_usable_p (rep_prefix_1_byte, memset)
24479 ? rep_prefix_1_byte : loop_1_byte;
24481 return alg_usable_p (rep_prefix_4_byte, memset)
24482 ? rep_prefix_4_byte : loop;
24484 /* Very tiny blocks are best handled via the loop, REP is expensive to
24486 else if (expected_size != -1 && expected_size < 4)
24487 return loop_1_byte;
24488 else if (expected_size != -1)
24490 enum stringop_alg alg = libcall;
24491 bool alg_noalign = false;
24492 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24494 /* We get here if the algorithms that were not libcall-based
24495 were rep-prefix based and we are unable to use rep prefixes
24496 based on global register usage. Break out of the loop and
24497 use the heuristic below. */
24498 if (algs->size[i].max == 0)
24500 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24502 enum stringop_alg candidate = algs->size[i].alg;
24504 if (candidate != libcall && alg_usable_p (candidate, memset))
24507 alg_noalign = algs->size[i].noalign;
24509 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24510 last non-libcall inline algorithm. */
24511 if (TARGET_INLINE_ALL_STRINGOPS)
24513 /* When the current size is best to be copied by a libcall,
24514 but we are still forced to inline, run the heuristic below
24515 that will pick code for medium sized blocks. */
24516 if (alg != libcall)
24518 *noalign = alg_noalign;
24521 else if (!any_alg_usable_p)
24524 else if (alg_usable_p (candidate, memset))
24526 *noalign = algs->size[i].noalign;
24532 /* When asked to inline the call anyway, try to pick meaningful choice.
24533 We look for maximal size of block that is faster to copy by hand and
24534 take blocks of at most of that size guessing that average size will
24535 be roughly half of the block.
24537 If this turns out to be bad, we might simply specify the preferred
24538 choice in ix86_costs. */
24539 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24540 && (algs->unknown_size == libcall
24541 || !alg_usable_p (algs->unknown_size, memset)))
24543 enum stringop_alg alg;
24545 /* If there aren't any usable algorithms, then recursing on
24546 smaller sizes isn't going to find anything. Just return the
24547 simple byte-at-a-time copy loop. */
24548 if (!any_alg_usable_p)
24550 /* Pick something reasonable. */
24551 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24552 *dynamic_check = 128;
24553 return loop_1_byte;
24557 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24558 zero_memset, dynamic_check, noalign);
24559 gcc_assert (*dynamic_check == -1);
24560 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24561 *dynamic_check = max;
24563 gcc_assert (alg != libcall);
24566 return (alg_usable_p (algs->unknown_size, memset)
24567 ? algs->unknown_size : libcall);
24570 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24571 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24573 decide_alignment (int align,
24574 enum stringop_alg alg,
24576 machine_mode move_mode)
24578 int desired_align = 0;
24580 gcc_assert (alg != no_stringop);
24582 if (alg == libcall)
24584 if (move_mode == VOIDmode)
24587 desired_align = GET_MODE_SIZE (move_mode);
24588 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24589 copying whole cacheline at once. */
24590 if (TARGET_PENTIUMPRO
24591 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24596 if (desired_align < align)
24597 desired_align = align;
24598 if (expected_size != -1 && expected_size < 4)
24599 desired_align = align;
24601 return desired_align;
24605 /* Helper function for memcpy. For QImode value 0xXY produce
24606 0xXYXYXYXY of wide specified by MODE. This is essentially
24607 a * 0x10101010, but we can do slightly better than
24608 synth_mult by unwinding the sequence by hand on CPUs with
24611 promote_duplicated_reg (machine_mode mode, rtx val)
24613 machine_mode valmode = GET_MODE (val);
24615 int nops = mode == DImode ? 3 : 2;
24617 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24618 if (val == const0_rtx)
24619 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24620 if (CONST_INT_P (val))
24622 HOST_WIDE_INT v = INTVAL (val) & 255;
24626 if (mode == DImode)
24627 v |= (v << 16) << 16;
24628 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24631 if (valmode == VOIDmode)
24633 if (valmode != QImode)
24634 val = gen_lowpart (QImode, val);
24635 if (mode == QImode)
24637 if (!TARGET_PARTIAL_REG_STALL)
24639 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24640 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24641 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24642 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24644 rtx reg = convert_modes (mode, QImode, val, true);
24645 tmp = promote_duplicated_reg (mode, const1_rtx);
24646 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24651 rtx reg = convert_modes (mode, QImode, val, true);
24653 if (!TARGET_PARTIAL_REG_STALL)
24654 if (mode == SImode)
24655 emit_insn (gen_movsi_insv_1 (reg, reg));
24657 emit_insn (gen_movdi_insv_1 (reg, reg));
24660 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24661 NULL, 1, OPTAB_DIRECT);
24663 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24665 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24666 NULL, 1, OPTAB_DIRECT);
24667 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24668 if (mode == SImode)
24670 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24671 NULL, 1, OPTAB_DIRECT);
24672 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24677 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24678 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24679 alignment from ALIGN to DESIRED_ALIGN. */
24681 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24687 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24688 promoted_val = promote_duplicated_reg (DImode, val);
24689 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24690 promoted_val = promote_duplicated_reg (SImode, val);
24691 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24692 promoted_val = promote_duplicated_reg (HImode, val);
24694 promoted_val = val;
24696 return promoted_val;
24699 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24700 operations when profitable. The code depends upon architecture, block size
24701 and alignment, but always has one of the following overall structures:
24703 Aligned move sequence:
24705 1) Prologue guard: Conditional that jumps up to epilogues for small
24706 blocks that can be handled by epilogue alone. This is faster
24707 but also needed for correctness, since prologue assume the block
24708 is larger than the desired alignment.
24710 Optional dynamic check for size and libcall for large
24711 blocks is emitted here too, with -minline-stringops-dynamically.
24713 2) Prologue: copy first few bytes in order to get destination
24714 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24715 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24716 copied. We emit either a jump tree on power of two sized
24717 blocks, or a byte loop.
24719 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24720 with specified algorithm.
24722 4) Epilogue: code copying tail of the block that is too small to be
24723 handled by main body (or up to size guarded by prologue guard).
24725 Misaligned move sequence
24727 1) missaligned move prologue/epilogue containing:
24728 a) Prologue handling small memory blocks and jumping to done_label
24729 (skipped if blocks are known to be large enough)
24730 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24731 needed by single possibly misaligned move
24732 (skipped if alignment is not needed)
24733 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24735 2) Zero size guard dispatching to done_label, if needed
24737 3) dispatch to library call, if needed,
24739 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24740 with specified algorithm. */
24742 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24743 rtx align_exp, rtx expected_align_exp,
24744 rtx expected_size_exp, rtx min_size_exp,
24745 rtx max_size_exp, rtx probable_max_size_exp,
24750 rtx_code_label *label = NULL;
24752 rtx_code_label *jump_around_label = NULL;
24753 HOST_WIDE_INT align = 1;
24754 unsigned HOST_WIDE_INT count = 0;
24755 HOST_WIDE_INT expected_size = -1;
24756 int size_needed = 0, epilogue_size_needed;
24757 int desired_align = 0, align_bytes = 0;
24758 enum stringop_alg alg;
24759 rtx promoted_val = NULL;
24760 rtx vec_promoted_val = NULL;
24761 bool force_loopy_epilogue = false;
24763 bool need_zero_guard = false;
24765 machine_mode move_mode = VOIDmode;
24766 int unroll_factor = 1;
24767 /* TODO: Once value ranges are available, fill in proper data. */
24768 unsigned HOST_WIDE_INT min_size = 0;
24769 unsigned HOST_WIDE_INT max_size = -1;
24770 unsigned HOST_WIDE_INT probable_max_size = -1;
24771 bool misaligned_prologue_used = false;
24773 if (CONST_INT_P (align_exp))
24774 align = INTVAL (align_exp);
24775 /* i386 can do misaligned access on reasonably increased cost. */
24776 if (CONST_INT_P (expected_align_exp)
24777 && INTVAL (expected_align_exp) > align)
24778 align = INTVAL (expected_align_exp);
24779 /* ALIGN is the minimum of destination and source alignment, but we care here
24780 just about destination alignment. */
24782 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24783 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24785 if (CONST_INT_P (count_exp))
24787 min_size = max_size = probable_max_size = count = expected_size
24788 = INTVAL (count_exp);
24789 /* When COUNT is 0, there is nothing to do. */
24796 min_size = INTVAL (min_size_exp);
24798 max_size = INTVAL (max_size_exp);
24799 if (probable_max_size_exp)
24800 probable_max_size = INTVAL (probable_max_size_exp);
24801 if (CONST_INT_P (expected_size_exp))
24802 expected_size = INTVAL (expected_size_exp);
24805 /* Make sure we don't need to care about overflow later on. */
24806 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24809 /* Step 0: Decide on preferred algorithm, desired alignment and
24810 size of chunks to be copied by main loop. */
24811 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24813 issetmem && val_exp == const0_rtx,
24814 &dynamic_check, &noalign);
24815 if (alg == libcall)
24817 gcc_assert (alg != no_stringop);
24819 /* For now vector-version of memset is generated only for memory zeroing, as
24820 creating of promoted vector value is very cheap in this case. */
24821 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24822 alg = unrolled_loop;
24825 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24826 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24828 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24831 move_mode = word_mode;
24837 gcc_unreachable ();
24839 need_zero_guard = true;
24840 move_mode = QImode;
24843 need_zero_guard = true;
24845 case unrolled_loop:
24846 need_zero_guard = true;
24847 unroll_factor = (TARGET_64BIT ? 4 : 2);
24850 need_zero_guard = true;
24852 /* Find the widest supported mode. */
24853 move_mode = word_mode;
24854 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24855 != CODE_FOR_nothing)
24856 move_mode = GET_MODE_WIDER_MODE (move_mode);
24858 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24859 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24860 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24862 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24863 move_mode = mode_for_vector (word_mode, nunits);
24864 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24865 move_mode = word_mode;
24867 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24869 case rep_prefix_8_byte:
24870 move_mode = DImode;
24872 case rep_prefix_4_byte:
24873 move_mode = SImode;
24875 case rep_prefix_1_byte:
24876 move_mode = QImode;
24879 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24880 epilogue_size_needed = size_needed;
24882 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24883 if (!TARGET_ALIGN_STRINGOPS || noalign)
24884 align = desired_align;
24886 /* Step 1: Prologue guard. */
24888 /* Alignment code needs count to be in register. */
24889 if (CONST_INT_P (count_exp) && desired_align > align)
24891 if (INTVAL (count_exp) > desired_align
24892 && INTVAL (count_exp) > size_needed)
24895 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24896 if (align_bytes <= 0)
24899 align_bytes = desired_align - align_bytes;
24901 if (align_bytes == 0)
24902 count_exp = force_reg (counter_mode (count_exp), count_exp);
24904 gcc_assert (desired_align >= 1 && align >= 1);
24906 /* Misaligned move sequences handle both prologue and epilogue at once.
24907 Default code generation results in a smaller code for large alignments
24908 and also avoids redundant job when sizes are known precisely. */
24909 misaligned_prologue_used
24910 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24911 && MAX (desired_align, epilogue_size_needed) <= 32
24912 && desired_align <= epilogue_size_needed
24913 && ((desired_align > align && !align_bytes)
24914 || (!count && epilogue_size_needed > 1)));
24916 /* Do the cheap promotion to allow better CSE across the
24917 main loop and epilogue (ie one load of the big constant in the
24919 For now the misaligned move sequences do not have fast path
24920 without broadcasting. */
24921 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24923 if (alg == vector_loop)
24925 gcc_assert (val_exp == const0_rtx);
24926 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24927 promoted_val = promote_duplicated_reg_to_size (val_exp,
24928 GET_MODE_SIZE (word_mode),
24929 desired_align, align);
24933 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24934 desired_align, align);
24937 /* Misaligned move sequences handles both prologues and epilogues at once.
24938 Default code generation results in smaller code for large alignments and
24939 also avoids redundant job when sizes are known precisely. */
24940 if (misaligned_prologue_used)
24942 /* Misaligned move prologue handled small blocks by itself. */
24943 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24944 (dst, src, &destreg, &srcreg,
24945 move_mode, promoted_val, vec_promoted_val,
24947 &jump_around_label,
24948 desired_align < align
24949 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24950 desired_align, align, &min_size, dynamic_check, issetmem);
24952 src = change_address (src, BLKmode, srcreg);
24953 dst = change_address (dst, BLKmode, destreg);
24954 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24955 epilogue_size_needed = 0;
24956 if (need_zero_guard && !min_size)
24958 /* It is possible that we copied enough so the main loop will not
24960 gcc_assert (size_needed > 1);
24961 if (jump_around_label == NULL_RTX)
24962 jump_around_label = gen_label_rtx ();
24963 emit_cmp_and_jump_insns (count_exp,
24964 GEN_INT (size_needed),
24965 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24966 if (expected_size == -1
24967 || expected_size < (desired_align - align) / 2 + size_needed)
24968 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24970 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24973 /* Ensure that alignment prologue won't copy past end of block. */
24974 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24976 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24977 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24978 Make sure it is power of 2. */
24979 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24981 /* To improve performance of small blocks, we jump around the VAL
24982 promoting mode. This mean that if the promoted VAL is not constant,
24983 we might not use it in the epilogue and have to use byte
24985 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24986 force_loopy_epilogue = true;
24987 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24988 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24990 /* If main algorithm works on QImode, no epilogue is needed.
24991 For small sizes just don't align anything. */
24992 if (size_needed == 1)
24993 desired_align = align;
24998 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25000 label = gen_label_rtx ();
25001 emit_cmp_and_jump_insns (count_exp,
25002 GEN_INT (epilogue_size_needed),
25003 LTU, 0, counter_mode (count_exp), 1, label);
25004 if (expected_size == -1 || expected_size < epilogue_size_needed)
25005 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25007 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25011 /* Emit code to decide on runtime whether library call or inline should be
25013 if (dynamic_check != -1)
25015 if (!issetmem && CONST_INT_P (count_exp))
25017 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
25019 emit_block_move_via_libcall (dst, src, count_exp, false);
25020 count_exp = const0_rtx;
25026 rtx_code_label *hot_label = gen_label_rtx ();
25027 if (jump_around_label == NULL_RTX)
25028 jump_around_label = gen_label_rtx ();
25029 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
25030 LEU, 0, counter_mode (count_exp),
25032 predict_jump (REG_BR_PROB_BASE * 90 / 100);
25034 set_storage_via_libcall (dst, count_exp, val_exp, false);
25036 emit_block_move_via_libcall (dst, src, count_exp, false);
25037 emit_jump (jump_around_label);
25038 emit_label (hot_label);
25042 /* Step 2: Alignment prologue. */
25043 /* Do the expensive promotion once we branched off the small blocks. */
25044 if (issetmem && !promoted_val)
25045 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25046 desired_align, align);
25048 if (desired_align > align && !misaligned_prologue_used)
25050 if (align_bytes == 0)
25052 /* Except for the first move in prologue, we no longer know
25053 constant offset in aliasing info. It don't seems to worth
25054 the pain to maintain it for the first move, so throw away
25056 dst = change_address (dst, BLKmode, destreg);
25058 src = change_address (src, BLKmode, srcreg);
25059 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25060 promoted_val, vec_promoted_val,
25061 count_exp, align, desired_align,
25063 /* At most desired_align - align bytes are copied. */
25064 if (min_size < (unsigned)(desired_align - align))
25067 min_size -= desired_align - align;
25071 /* If we know how many bytes need to be stored before dst is
25072 sufficiently aligned, maintain aliasing info accurately. */
25073 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25081 count_exp = plus_constant (counter_mode (count_exp),
25082 count_exp, -align_bytes);
25083 count -= align_bytes;
25084 min_size -= align_bytes;
25085 max_size -= align_bytes;
25087 if (need_zero_guard
25089 && (count < (unsigned HOST_WIDE_INT) size_needed
25090 || (align_bytes == 0
25091 && count < ((unsigned HOST_WIDE_INT) size_needed
25092 + desired_align - align))))
25094 /* It is possible that we copied enough so the main loop will not
25096 gcc_assert (size_needed > 1);
25097 if (label == NULL_RTX)
25098 label = gen_label_rtx ();
25099 emit_cmp_and_jump_insns (count_exp,
25100 GEN_INT (size_needed),
25101 LTU, 0, counter_mode (count_exp), 1, label);
25102 if (expected_size == -1
25103 || expected_size < (desired_align - align) / 2 + size_needed)
25104 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25106 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25109 if (label && size_needed == 1)
25111 emit_label (label);
25112 LABEL_NUSES (label) = 1;
25114 epilogue_size_needed = 1;
25116 promoted_val = val_exp;
25118 else if (label == NULL_RTX && !misaligned_prologue_used)
25119 epilogue_size_needed = size_needed;
25121 /* Step 3: Main loop. */
25128 gcc_unreachable ();
25131 case unrolled_loop:
25132 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25133 count_exp, move_mode, unroll_factor,
25134 expected_size, issetmem);
25137 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25138 vec_promoted_val, count_exp, move_mode,
25139 unroll_factor, expected_size, issetmem);
25141 case rep_prefix_8_byte:
25142 case rep_prefix_4_byte:
25143 case rep_prefix_1_byte:
25144 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25145 val_exp, count_exp, move_mode, issetmem);
25148 /* Adjust properly the offset of src and dest memory for aliasing. */
25149 if (CONST_INT_P (count_exp))
25152 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25153 (count / size_needed) * size_needed);
25154 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25155 (count / size_needed) * size_needed);
25160 src = change_address (src, BLKmode, srcreg);
25161 dst = change_address (dst, BLKmode, destreg);
25164 /* Step 4: Epilogue to copy the remaining bytes. */
25168 /* When the main loop is done, COUNT_EXP might hold original count,
25169 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25170 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25171 bytes. Compensate if needed. */
25173 if (size_needed < epilogue_size_needed)
25176 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25177 GEN_INT (size_needed - 1), count_exp, 1,
25179 if (tmp != count_exp)
25180 emit_move_insn (count_exp, tmp);
25182 emit_label (label);
25183 LABEL_NUSES (label) = 1;
25186 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25188 if (force_loopy_epilogue)
25189 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25190 epilogue_size_needed);
25194 expand_setmem_epilogue (dst, destreg, promoted_val,
25195 vec_promoted_val, count_exp,
25196 epilogue_size_needed);
25198 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25199 epilogue_size_needed);
25202 if (jump_around_label)
25203 emit_label (jump_around_label);
25208 /* Expand the appropriate insns for doing strlen if not just doing
25211 out = result, initialized with the start address
25212 align_rtx = alignment of the address.
25213 scratch = scratch register, initialized with the startaddress when
25214 not aligned, otherwise undefined
25216 This is just the body. It needs the initializations mentioned above and
25217 some address computing at the end. These things are done in i386.md. */
25220 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25224 rtx_code_label *align_2_label = NULL;
25225 rtx_code_label *align_3_label = NULL;
25226 rtx_code_label *align_4_label = gen_label_rtx ();
25227 rtx_code_label *end_0_label = gen_label_rtx ();
25229 rtx tmpreg = gen_reg_rtx (SImode);
25230 rtx scratch = gen_reg_rtx (SImode);
25234 if (CONST_INT_P (align_rtx))
25235 align = INTVAL (align_rtx);
25237 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25239 /* Is there a known alignment and is it less than 4? */
25242 rtx scratch1 = gen_reg_rtx (Pmode);
25243 emit_move_insn (scratch1, out);
25244 /* Is there a known alignment and is it not 2? */
25247 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25248 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25250 /* Leave just the 3 lower bits. */
25251 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25252 NULL_RTX, 0, OPTAB_WIDEN);
25254 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25255 Pmode, 1, align_4_label);
25256 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25257 Pmode, 1, align_2_label);
25258 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25259 Pmode, 1, align_3_label);
25263 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25264 check if is aligned to 4 - byte. */
25266 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25267 NULL_RTX, 0, OPTAB_WIDEN);
25269 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25270 Pmode, 1, align_4_label);
25273 mem = change_address (src, QImode, out);
25275 /* Now compare the bytes. */
25277 /* Compare the first n unaligned byte on a byte per byte basis. */
25278 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25279 QImode, 1, end_0_label);
25281 /* Increment the address. */
25282 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25284 /* Not needed with an alignment of 2 */
25287 emit_label (align_2_label);
25289 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25292 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25294 emit_label (align_3_label);
25297 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25300 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25303 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25304 align this loop. It gives only huge programs, but does not help to
25306 emit_label (align_4_label);
25308 mem = change_address (src, SImode, out);
25309 emit_move_insn (scratch, mem);
25310 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25312 /* This formula yields a nonzero result iff one of the bytes is zero.
25313 This saves three branches inside loop and many cycles. */
25315 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25316 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25317 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25318 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25319 gen_int_mode (0x80808080, SImode)));
25320 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25325 rtx reg = gen_reg_rtx (SImode);
25326 rtx reg2 = gen_reg_rtx (Pmode);
25327 emit_move_insn (reg, tmpreg);
25328 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25330 /* If zero is not in the first two bytes, move two bytes forward. */
25331 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25332 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25333 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25334 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
25335 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25338 /* Emit lea manually to avoid clobbering of flags. */
25339 emit_insn (gen_rtx_SET (SImode, reg2,
25340 gen_rtx_PLUS (Pmode, out, const2_rtx)));
25342 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25343 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25344 emit_insn (gen_rtx_SET (VOIDmode, out,
25345 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25351 rtx_code_label *end_2_label = gen_label_rtx ();
25352 /* Is zero in the first two bytes? */
25354 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25355 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25356 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25357 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25358 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25360 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25361 JUMP_LABEL (tmp) = end_2_label;
25363 /* Not in the first two. Move two bytes forward. */
25364 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25365 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25367 emit_label (end_2_label);
25371 /* Avoid branch in fixing the byte. */
25372 tmpreg = gen_lowpart (QImode, tmpreg);
25373 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
25374 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25375 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25376 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25378 emit_label (end_0_label);
25381 /* Expand strlen. */
25384 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25386 rtx addr, scratch1, scratch2, scratch3, scratch4;
25388 /* The generic case of strlen expander is long. Avoid it's
25389 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25391 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25392 && !TARGET_INLINE_ALL_STRINGOPS
25393 && !optimize_insn_for_size_p ()
25394 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25397 addr = force_reg (Pmode, XEXP (src, 0));
25398 scratch1 = gen_reg_rtx (Pmode);
25400 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25401 && !optimize_insn_for_size_p ())
25403 /* Well it seems that some optimizer does not combine a call like
25404 foo(strlen(bar), strlen(bar));
25405 when the move and the subtraction is done here. It does calculate
25406 the length just once when these instructions are done inside of
25407 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25408 often used and I use one fewer register for the lifetime of
25409 output_strlen_unroll() this is better. */
25411 emit_move_insn (out, addr);
25413 ix86_expand_strlensi_unroll_1 (out, src, align);
25415 /* strlensi_unroll_1 returns the address of the zero at the end of
25416 the string, like memchr(), so compute the length by subtracting
25417 the start address. */
25418 emit_insn (ix86_gen_sub3 (out, out, addr));
25424 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25425 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25428 scratch2 = gen_reg_rtx (Pmode);
25429 scratch3 = gen_reg_rtx (Pmode);
25430 scratch4 = force_reg (Pmode, constm1_rtx);
25432 emit_move_insn (scratch3, addr);
25433 eoschar = force_reg (QImode, eoschar);
25435 src = replace_equiv_address_nv (src, scratch3);
25437 /* If .md starts supporting :P, this can be done in .md. */
25438 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25439 scratch4), UNSPEC_SCAS);
25440 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25441 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25442 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25447 /* For given symbol (function) construct code to compute address of it's PLT
25448 entry in large x86-64 PIC model. */
25450 construct_plt_address (rtx symbol)
25454 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25455 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25456 gcc_assert (Pmode == DImode);
25458 tmp = gen_reg_rtx (Pmode);
25459 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25461 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25462 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25467 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25469 rtx pop, bool sibcall)
25472 rtx use = NULL, call;
25473 unsigned int vec_len = 0;
25475 if (pop == const0_rtx)
25477 gcc_assert (!TARGET_64BIT || !pop);
25479 if (TARGET_MACHO && !TARGET_64BIT)
25482 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25483 fnaddr = machopic_indirect_call_target (fnaddr);
25488 /* Static functions and indirect calls don't need the pic register. */
25491 || (ix86_cmodel == CM_LARGE_PIC
25492 && DEFAULT_ABI != MS_ABI))
25493 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25494 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25496 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25497 if (ix86_use_pseudo_pic_reg ())
25498 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25499 pic_offset_table_rtx);
25503 /* Skip setting up RAX register for -mskip-rax-setup when there are no
25504 parameters passed in vector registers. */
25506 && (INTVAL (callarg2) > 0
25507 || (INTVAL (callarg2) == 0
25508 && (TARGET_SSE || !flag_skip_rax_setup))))
25510 rtx al = gen_rtx_REG (QImode, AX_REG);
25511 emit_move_insn (al, callarg2);
25512 use_reg (&use, al);
25515 if (ix86_cmodel == CM_LARGE_PIC
25518 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25519 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25520 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25522 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25523 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25525 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25526 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25529 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25533 /* We should add bounds as destination register in case
25534 pointer with bounds may be returned. */
25535 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25537 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25538 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25539 retval = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, retval, b0, b1));
25540 chkp_put_regs_to_expr_list (retval);
25543 call = gen_rtx_SET (VOIDmode, retval, call);
25545 vec[vec_len++] = call;
25549 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25550 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25551 vec[vec_len++] = pop;
25554 if (TARGET_64BIT_MS_ABI
25555 && (!callarg2 || INTVAL (callarg2) != -2))
25557 int const cregs_size
25558 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25561 for (i = 0; i < cregs_size; i++)
25563 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25564 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25566 clobber_reg (&use, gen_rtx_REG (mode, regno));
25571 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25572 call = emit_call_insn (call);
25574 CALL_INSN_FUNCTION_USAGE (call) = use;
25579 /* Output the assembly for a call instruction. */
25582 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25584 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25585 bool seh_nop_p = false;
25588 if (SIBLING_CALL_P (insn))
25591 xasm = "%!jmp\t%P0";
25592 /* SEH epilogue detection requires the indirect branch case
25593 to include REX.W. */
25594 else if (TARGET_SEH)
25595 xasm = "%!rex.W jmp %A0";
25597 xasm = "%!jmp\t%A0";
25599 output_asm_insn (xasm, &call_op);
25603 /* SEH unwinding can require an extra nop to be emitted in several
25604 circumstances. Determine if we have one of those. */
25609 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25611 /* If we get to another real insn, we don't need the nop. */
25615 /* If we get to the epilogue note, prevent a catch region from
25616 being adjacent to the standard epilogue sequence. If non-
25617 call-exceptions, we'll have done this during epilogue emission. */
25618 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25619 && !flag_non_call_exceptions
25620 && !can_throw_internal (insn))
25627 /* If we didn't find a real insn following the call, prevent the
25628 unwinder from looking into the next function. */
25634 xasm = "%!call\t%P0";
25636 xasm = "%!call\t%A0";
25638 output_asm_insn (xasm, &call_op);
25646 /* Clear stack slot assignments remembered from previous functions.
25647 This is called from INIT_EXPANDERS once before RTL is emitted for each
25650 static struct machine_function *
25651 ix86_init_machine_status (void)
25653 struct machine_function *f;
25655 f = ggc_cleared_alloc<machine_function> ();
25656 f->use_fast_prologue_epilogue_nregs = -1;
25657 f->call_abi = ix86_abi;
25662 /* Return a MEM corresponding to a stack slot with mode MODE.
25663 Allocate a new slot if necessary.
25665 The RTL for a function can have several slots available: N is
25666 which slot to use. */
25669 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25671 struct stack_local_entry *s;
25673 gcc_assert (n < MAX_386_STACK_LOCALS);
25675 for (s = ix86_stack_locals; s; s = s->next)
25676 if (s->mode == mode && s->n == n)
25677 return validize_mem (copy_rtx (s->rtl));
25679 s = ggc_alloc<stack_local_entry> ();
25682 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25684 s->next = ix86_stack_locals;
25685 ix86_stack_locals = s;
25686 return validize_mem (copy_rtx (s->rtl));
25690 ix86_instantiate_decls (void)
25692 struct stack_local_entry *s;
25694 for (s = ix86_stack_locals; s; s = s->next)
25695 if (s->rtl != NULL_RTX)
25696 instantiate_decl_rtl (s->rtl);
25699 /* Check whether x86 address PARTS is a pc-relative address. */
25702 rip_relative_addr_p (struct ix86_address *parts)
25704 rtx base, index, disp;
25706 base = parts->base;
25707 index = parts->index;
25708 disp = parts->disp;
25710 if (disp && !base && !index)
25716 if (GET_CODE (disp) == CONST)
25717 symbol = XEXP (disp, 0);
25718 if (GET_CODE (symbol) == PLUS
25719 && CONST_INT_P (XEXP (symbol, 1)))
25720 symbol = XEXP (symbol, 0);
25722 if (GET_CODE (symbol) == LABEL_REF
25723 || (GET_CODE (symbol) == SYMBOL_REF
25724 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25725 || (GET_CODE (symbol) == UNSPEC
25726 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25727 || XINT (symbol, 1) == UNSPEC_PCREL
25728 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25735 /* Calculate the length of the memory address in the instruction encoding.
25736 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25737 or other prefixes. We never generate addr32 prefix for LEA insn. */
25740 memory_address_length (rtx addr, bool lea)
25742 struct ix86_address parts;
25743 rtx base, index, disp;
25747 if (GET_CODE (addr) == PRE_DEC
25748 || GET_CODE (addr) == POST_INC
25749 || GET_CODE (addr) == PRE_MODIFY
25750 || GET_CODE (addr) == POST_MODIFY)
25753 ok = ix86_decompose_address (addr, &parts);
25756 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25758 /* If this is not LEA instruction, add the length of addr32 prefix. */
25759 if (TARGET_64BIT && !lea
25760 && (SImode_address_operand (addr, VOIDmode)
25761 || (parts.base && GET_MODE (parts.base) == SImode)
25762 || (parts.index && GET_MODE (parts.index) == SImode)))
25766 index = parts.index;
25769 if (base && GET_CODE (base) == SUBREG)
25770 base = SUBREG_REG (base);
25771 if (index && GET_CODE (index) == SUBREG)
25772 index = SUBREG_REG (index);
25774 gcc_assert (base == NULL_RTX || REG_P (base));
25775 gcc_assert (index == NULL_RTX || REG_P (index));
25778 - esp as the base always wants an index,
25779 - ebp as the base always wants a displacement,
25780 - r12 as the base always wants an index,
25781 - r13 as the base always wants a displacement. */
25783 /* Register Indirect. */
25784 if (base && !index && !disp)
25786 /* esp (for its index) and ebp (for its displacement) need
25787 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25789 if (base == arg_pointer_rtx
25790 || base == frame_pointer_rtx
25791 || REGNO (base) == SP_REG
25792 || REGNO (base) == BP_REG
25793 || REGNO (base) == R12_REG
25794 || REGNO (base) == R13_REG)
25798 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25799 is not disp32, but disp32(%rip), so for disp32
25800 SIB byte is needed, unless print_operand_address
25801 optimizes it into disp32(%rip) or (%rip) is implied
25803 else if (disp && !base && !index)
25806 if (rip_relative_addr_p (&parts))
25811 /* Find the length of the displacement constant. */
25814 if (base && satisfies_constraint_K (disp))
25819 /* ebp always wants a displacement. Similarly r13. */
25820 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25823 /* An index requires the two-byte modrm form.... */
25825 /* ...like esp (or r12), which always wants an index. */
25826 || base == arg_pointer_rtx
25827 || base == frame_pointer_rtx
25828 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25835 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25836 is set, expect that insn have 8bit immediate alternative. */
25838 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25842 extract_insn_cached (insn);
25843 for (i = recog_data.n_operands - 1; i >= 0; --i)
25844 if (CONSTANT_P (recog_data.operand[i]))
25846 enum attr_mode mode = get_attr_mode (insn);
25849 if (shortform && CONST_INT_P (recog_data.operand[i]))
25851 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25858 ival = trunc_int_for_mode (ival, HImode);
25861 ival = trunc_int_for_mode (ival, SImode);
25866 if (IN_RANGE (ival, -128, 127))
25883 /* Immediates for DImode instructions are encoded
25884 as 32bit sign extended values. */
25889 fatal_insn ("unknown insn mode", insn);
25895 /* Compute default value for "length_address" attribute. */
25897 ix86_attr_length_address_default (rtx_insn *insn)
25901 if (get_attr_type (insn) == TYPE_LEA)
25903 rtx set = PATTERN (insn), addr;
25905 if (GET_CODE (set) == PARALLEL)
25906 set = XVECEXP (set, 0, 0);
25908 gcc_assert (GET_CODE (set) == SET);
25910 addr = SET_SRC (set);
25912 return memory_address_length (addr, true);
25915 extract_insn_cached (insn);
25916 for (i = recog_data.n_operands - 1; i >= 0; --i)
25917 if (MEM_P (recog_data.operand[i]))
25919 constrain_operands_cached (insn, reload_completed);
25920 if (which_alternative != -1)
25922 const char *constraints = recog_data.constraints[i];
25923 int alt = which_alternative;
25925 while (*constraints == '=' || *constraints == '+')
25928 while (*constraints++ != ',')
25930 /* Skip ignored operands. */
25931 if (*constraints == 'X')
25934 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25939 /* Compute default value for "length_vex" attribute. It includes
25940 2 or 3 byte VEX prefix and 1 opcode byte. */
25943 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
25948 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25949 byte VEX prefix. */
25950 if (!has_0f_opcode || has_vex_w)
25953 /* We can always use 2 byte VEX prefix in 32bit. */
25957 extract_insn_cached (insn);
25959 for (i = recog_data.n_operands - 1; i >= 0; --i)
25960 if (REG_P (recog_data.operand[i]))
25962 /* REX.W bit uses 3 byte VEX prefix. */
25963 if (GET_MODE (recog_data.operand[i]) == DImode
25964 && GENERAL_REG_P (recog_data.operand[i]))
25969 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25970 if (MEM_P (recog_data.operand[i])
25971 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25978 /* Return the maximum number of instructions a cpu can issue. */
25981 ix86_issue_rate (void)
25985 case PROCESSOR_PENTIUM:
25986 case PROCESSOR_BONNELL:
25987 case PROCESSOR_SILVERMONT:
25988 case PROCESSOR_KNL:
25989 case PROCESSOR_INTEL:
25991 case PROCESSOR_BTVER2:
25992 case PROCESSOR_PENTIUM4:
25993 case PROCESSOR_NOCONA:
25996 case PROCESSOR_PENTIUMPRO:
25997 case PROCESSOR_ATHLON:
25999 case PROCESSOR_AMDFAM10:
26000 case PROCESSOR_GENERIC:
26001 case PROCESSOR_BTVER1:
26004 case PROCESSOR_BDVER1:
26005 case PROCESSOR_BDVER2:
26006 case PROCESSOR_BDVER3:
26007 case PROCESSOR_BDVER4:
26008 case PROCESSOR_CORE2:
26009 case PROCESSOR_NEHALEM:
26010 case PROCESSOR_SANDYBRIDGE:
26011 case PROCESSOR_HASWELL:
26019 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
26020 by DEP_INSN and nothing set by DEP_INSN. */
26023 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
26027 /* Simplify the test for uninteresting insns. */
26028 if (insn_type != TYPE_SETCC
26029 && insn_type != TYPE_ICMOV
26030 && insn_type != TYPE_FCMOV
26031 && insn_type != TYPE_IBR)
26034 if ((set = single_set (dep_insn)) != 0)
26036 set = SET_DEST (set);
26039 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
26040 && XVECLEN (PATTERN (dep_insn), 0) == 2
26041 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
26042 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
26044 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26045 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26050 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
26053 /* This test is true if the dependent insn reads the flags but
26054 not any other potentially set register. */
26055 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
26058 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
26064 /* Return true iff USE_INSN has a memory address with operands set by
26068 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26071 extract_insn_cached (use_insn);
26072 for (i = recog_data.n_operands - 1; i >= 0; --i)
26073 if (MEM_P (recog_data.operand[i]))
26075 rtx addr = XEXP (recog_data.operand[i], 0);
26076 return modified_in_p (addr, set_insn) != 0;
26081 /* Helper function for exact_store_load_dependency.
26082 Return true if addr is found in insn. */
26084 exact_dependency_1 (rtx addr, rtx insn)
26086 enum rtx_code code;
26087 const char *format_ptr;
26090 code = GET_CODE (insn);
26094 if (rtx_equal_p (addr, insn))
26109 format_ptr = GET_RTX_FORMAT (code);
26110 for (i = 0; i < GET_RTX_LENGTH (code); i++)
26112 switch (*format_ptr++)
26115 if (exact_dependency_1 (addr, XEXP (insn, i)))
26119 for (j = 0; j < XVECLEN (insn, i); j++)
26120 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26128 /* Return true if there exists exact dependency for store & load, i.e.
26129 the same memory address is used in them. */
26131 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26135 set1 = single_set (store);
26138 if (!MEM_P (SET_DEST (set1)))
26140 set2 = single_set (load);
26143 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26149 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26151 enum attr_type insn_type, dep_insn_type;
26152 enum attr_memory memory;
26154 int dep_insn_code_number;
26156 /* Anti and output dependencies have zero cost on all CPUs. */
26157 if (REG_NOTE_KIND (link) != 0)
26160 dep_insn_code_number = recog_memoized (dep_insn);
26162 /* If we can't recognize the insns, we can't really do anything. */
26163 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26166 insn_type = get_attr_type (insn);
26167 dep_insn_type = get_attr_type (dep_insn);
26171 case PROCESSOR_PENTIUM:
26172 /* Address Generation Interlock adds a cycle of latency. */
26173 if (insn_type == TYPE_LEA)
26175 rtx addr = PATTERN (insn);
26177 if (GET_CODE (addr) == PARALLEL)
26178 addr = XVECEXP (addr, 0, 0);
26180 gcc_assert (GET_CODE (addr) == SET);
26182 addr = SET_SRC (addr);
26183 if (modified_in_p (addr, dep_insn))
26186 else if (ix86_agi_dependent (dep_insn, insn))
26189 /* ??? Compares pair with jump/setcc. */
26190 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26193 /* Floating point stores require value to be ready one cycle earlier. */
26194 if (insn_type == TYPE_FMOV
26195 && get_attr_memory (insn) == MEMORY_STORE
26196 && !ix86_agi_dependent (dep_insn, insn))
26200 case PROCESSOR_PENTIUMPRO:
26201 /* INT->FP conversion is expensive. */
26202 if (get_attr_fp_int_src (dep_insn))
26205 /* There is one cycle extra latency between an FP op and a store. */
26206 if (insn_type == TYPE_FMOV
26207 && (set = single_set (dep_insn)) != NULL_RTX
26208 && (set2 = single_set (insn)) != NULL_RTX
26209 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26210 && MEM_P (SET_DEST (set2)))
26213 memory = get_attr_memory (insn);
26215 /* Show ability of reorder buffer to hide latency of load by executing
26216 in parallel with previous instruction in case
26217 previous instruction is not needed to compute the address. */
26218 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26219 && !ix86_agi_dependent (dep_insn, insn))
26221 /* Claim moves to take one cycle, as core can issue one load
26222 at time and the next load can start cycle later. */
26223 if (dep_insn_type == TYPE_IMOV
26224 || dep_insn_type == TYPE_FMOV)
26232 /* The esp dependency is resolved before
26233 the instruction is really finished. */
26234 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26235 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26238 /* INT->FP conversion is expensive. */
26239 if (get_attr_fp_int_src (dep_insn))
26242 memory = get_attr_memory (insn);
26244 /* Show ability of reorder buffer to hide latency of load by executing
26245 in parallel with previous instruction in case
26246 previous instruction is not needed to compute the address. */
26247 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26248 && !ix86_agi_dependent (dep_insn, insn))
26250 /* Claim moves to take one cycle, as core can issue one load
26251 at time and the next load can start cycle later. */
26252 if (dep_insn_type == TYPE_IMOV
26253 || dep_insn_type == TYPE_FMOV)
26262 case PROCESSOR_AMDFAM10:
26263 case PROCESSOR_BDVER1:
26264 case PROCESSOR_BDVER2:
26265 case PROCESSOR_BDVER3:
26266 case PROCESSOR_BDVER4:
26267 case PROCESSOR_BTVER1:
26268 case PROCESSOR_BTVER2:
26269 case PROCESSOR_GENERIC:
26270 /* Stack engine allows to execute push&pop instructions in parall. */
26271 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26272 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26276 case PROCESSOR_ATHLON:
26278 memory = get_attr_memory (insn);
26280 /* Show ability of reorder buffer to hide latency of load by executing
26281 in parallel with previous instruction in case
26282 previous instruction is not needed to compute the address. */
26283 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26284 && !ix86_agi_dependent (dep_insn, insn))
26286 enum attr_unit unit = get_attr_unit (insn);
26289 /* Because of the difference between the length of integer and
26290 floating unit pipeline preparation stages, the memory operands
26291 for floating point are cheaper.
26293 ??? For Athlon it the difference is most probably 2. */
26294 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26297 loadcost = TARGET_ATHLON ? 2 : 0;
26299 if (cost >= loadcost)
26306 case PROCESSOR_CORE2:
26307 case PROCESSOR_NEHALEM:
26308 case PROCESSOR_SANDYBRIDGE:
26309 case PROCESSOR_HASWELL:
26310 /* Stack engine allows to execute push&pop instructions in parall. */
26311 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26312 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26315 memory = get_attr_memory (insn);
26317 /* Show ability of reorder buffer to hide latency of load by executing
26318 in parallel with previous instruction in case
26319 previous instruction is not needed to compute the address. */
26320 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26321 && !ix86_agi_dependent (dep_insn, insn))
26330 case PROCESSOR_SILVERMONT:
26331 case PROCESSOR_KNL:
26332 case PROCESSOR_INTEL:
26333 if (!reload_completed)
26336 /* Increase cost of integer loads. */
26337 memory = get_attr_memory (dep_insn);
26338 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26340 enum attr_unit unit = get_attr_unit (dep_insn);
26341 if (unit == UNIT_INTEGER && cost == 1)
26343 if (memory == MEMORY_LOAD)
26347 /* Increase cost of ld/st for short int types only
26348 because of store forwarding issue. */
26349 rtx set = single_set (dep_insn);
26350 if (set && (GET_MODE (SET_DEST (set)) == QImode
26351 || GET_MODE (SET_DEST (set)) == HImode))
26353 /* Increase cost of store/load insn if exact
26354 dependence exists and it is load insn. */
26355 enum attr_memory insn_memory = get_attr_memory (insn);
26356 if (insn_memory == MEMORY_LOAD
26357 && exact_store_load_dependency (dep_insn, insn))
26371 /* How many alternative schedules to try. This should be as wide as the
26372 scheduling freedom in the DFA, but no wider. Making this value too
26373 large results extra work for the scheduler. */
26376 ia32_multipass_dfa_lookahead (void)
26380 case PROCESSOR_PENTIUM:
26383 case PROCESSOR_PENTIUMPRO:
26387 case PROCESSOR_BDVER1:
26388 case PROCESSOR_BDVER2:
26389 case PROCESSOR_BDVER3:
26390 case PROCESSOR_BDVER4:
26391 /* We use lookahead value 4 for BD both before and after reload
26392 schedules. Plan is to have value 8 included for O3. */
26395 case PROCESSOR_CORE2:
26396 case PROCESSOR_NEHALEM:
26397 case PROCESSOR_SANDYBRIDGE:
26398 case PROCESSOR_HASWELL:
26399 case PROCESSOR_BONNELL:
26400 case PROCESSOR_SILVERMONT:
26401 case PROCESSOR_KNL:
26402 case PROCESSOR_INTEL:
26403 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26404 as many instructions can be executed on a cycle, i.e.,
26405 issue_rate. I wonder why tuning for many CPUs does not do this. */
26406 if (reload_completed)
26407 return ix86_issue_rate ();
26408 /* Don't use lookahead for pre-reload schedule to save compile time. */
26416 /* Return true if target platform supports macro-fusion. */
26419 ix86_macro_fusion_p ()
26421 return TARGET_FUSE_CMP_AND_BRANCH;
26424 /* Check whether current microarchitecture support macro fusion
26425 for insn pair "CONDGEN + CONDJMP". Refer to
26426 "Intel Architectures Optimization Reference Manual". */
26429 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26432 enum rtx_code ccode;
26433 rtx compare_set = NULL_RTX, test_if, cond;
26434 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26436 if (!any_condjump_p (condjmp))
26439 if (get_attr_type (condgen) != TYPE_TEST
26440 && get_attr_type (condgen) != TYPE_ICMP
26441 && get_attr_type (condgen) != TYPE_INCDEC
26442 && get_attr_type (condgen) != TYPE_ALU)
26445 compare_set = single_set (condgen);
26446 if (compare_set == NULL_RTX
26447 && !TARGET_FUSE_ALU_AND_BRANCH)
26450 if (compare_set == NULL_RTX)
26453 rtx pat = PATTERN (condgen);
26454 for (i = 0; i < XVECLEN (pat, 0); i++)
26455 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26457 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26458 if (GET_CODE (set_src) == COMPARE)
26459 compare_set = XVECEXP (pat, 0, i);
26461 alu_set = XVECEXP (pat, 0, i);
26464 if (compare_set == NULL_RTX)
26466 src = SET_SRC (compare_set);
26467 if (GET_CODE (src) != COMPARE)
26470 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26472 if ((MEM_P (XEXP (src, 0))
26473 && CONST_INT_P (XEXP (src, 1)))
26474 || (MEM_P (XEXP (src, 1))
26475 && CONST_INT_P (XEXP (src, 0))))
26478 /* No fusion for RIP-relative address. */
26479 if (MEM_P (XEXP (src, 0)))
26480 addr = XEXP (XEXP (src, 0), 0);
26481 else if (MEM_P (XEXP (src, 1)))
26482 addr = XEXP (XEXP (src, 1), 0);
26485 ix86_address parts;
26486 int ok = ix86_decompose_address (addr, &parts);
26489 if (rip_relative_addr_p (&parts))
26493 test_if = SET_SRC (pc_set (condjmp));
26494 cond = XEXP (test_if, 0);
26495 ccode = GET_CODE (cond);
26496 /* Check whether conditional jump use Sign or Overflow Flags. */
26497 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26504 /* Return true for TYPE_TEST and TYPE_ICMP. */
26505 if (get_attr_type (condgen) == TYPE_TEST
26506 || get_attr_type (condgen) == TYPE_ICMP)
26509 /* The following is the case that macro-fusion for alu + jmp. */
26510 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26513 /* No fusion for alu op with memory destination operand. */
26514 dest = SET_DEST (alu_set);
26518 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26520 if (get_attr_type (condgen) == TYPE_INCDEC
26530 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26531 execution. It is applied if
26532 (1) IMUL instruction is on the top of list;
26533 (2) There exists the only producer of independent IMUL instruction in
26535 Return index of IMUL producer if it was found and -1 otherwise. */
26537 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26540 rtx set, insn1, insn2;
26541 sd_iterator_def sd_it;
26546 if (!TARGET_BONNELL)
26549 /* Check that IMUL instruction is on the top of ready list. */
26550 insn = ready[n_ready - 1];
26551 set = single_set (insn);
26554 if (!(GET_CODE (SET_SRC (set)) == MULT
26555 && GET_MODE (SET_SRC (set)) == SImode))
26558 /* Search for producer of independent IMUL instruction. */
26559 for (i = n_ready - 2; i >= 0; i--)
26562 if (!NONDEBUG_INSN_P (insn))
26564 /* Skip IMUL instruction. */
26565 insn2 = PATTERN (insn);
26566 if (GET_CODE (insn2) == PARALLEL)
26567 insn2 = XVECEXP (insn2, 0, 0);
26568 if (GET_CODE (insn2) == SET
26569 && GET_CODE (SET_SRC (insn2)) == MULT
26570 && GET_MODE (SET_SRC (insn2)) == SImode)
26573 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26576 con = DEP_CON (dep);
26577 if (!NONDEBUG_INSN_P (con))
26579 insn1 = PATTERN (con);
26580 if (GET_CODE (insn1) == PARALLEL)
26581 insn1 = XVECEXP (insn1, 0, 0);
26583 if (GET_CODE (insn1) == SET
26584 && GET_CODE (SET_SRC (insn1)) == MULT
26585 && GET_MODE (SET_SRC (insn1)) == SImode)
26587 sd_iterator_def sd_it1;
26589 /* Check if there is no other dependee for IMUL. */
26591 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26594 pro = DEP_PRO (dep1);
26595 if (!NONDEBUG_INSN_P (pro))
26610 /* Try to find the best candidate on the top of ready list if two insns
26611 have the same priority - candidate is best if its dependees were
26612 scheduled earlier. Applied for Silvermont only.
26613 Return true if top 2 insns must be interchanged. */
26615 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26617 rtx_insn *top = ready[n_ready - 1];
26618 rtx_insn *next = ready[n_ready - 2];
26620 sd_iterator_def sd_it;
26624 #define INSN_TICK(INSN) (HID (INSN)->tick)
26626 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26629 if (!NONDEBUG_INSN_P (top))
26631 if (!NONJUMP_INSN_P (top))
26633 if (!NONDEBUG_INSN_P (next))
26635 if (!NONJUMP_INSN_P (next))
26637 set = single_set (top);
26640 set = single_set (next);
26644 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26646 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26648 /* Determine winner more precise. */
26649 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26652 pro = DEP_PRO (dep);
26653 if (!NONDEBUG_INSN_P (pro))
26655 if (INSN_TICK (pro) > clock1)
26656 clock1 = INSN_TICK (pro);
26658 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26661 pro = DEP_PRO (dep);
26662 if (!NONDEBUG_INSN_P (pro))
26664 if (INSN_TICK (pro) > clock2)
26665 clock2 = INSN_TICK (pro);
26668 if (clock1 == clock2)
26670 /* Determine winner - load must win. */
26671 enum attr_memory memory1, memory2;
26672 memory1 = get_attr_memory (top);
26673 memory2 = get_attr_memory (next);
26674 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26677 return (bool) (clock2 < clock1);
26683 /* Perform possible reodering of ready list for Atom/Silvermont only.
26684 Return issue rate. */
26686 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26687 int *pn_ready, int clock_var)
26689 int issue_rate = -1;
26690 int n_ready = *pn_ready;
26695 /* Set up issue rate. */
26696 issue_rate = ix86_issue_rate ();
26698 /* Do reodering for BONNELL/SILVERMONT only. */
26699 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26702 /* Nothing to do if ready list contains only 1 instruction. */
26706 /* Do reodering for post-reload scheduler only. */
26707 if (!reload_completed)
26710 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26712 if (sched_verbose > 1)
26713 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26714 INSN_UID (ready[index]));
26716 /* Put IMUL producer (ready[index]) at the top of ready list. */
26717 insn = ready[index];
26718 for (i = index; i < n_ready - 1; i++)
26719 ready[i] = ready[i + 1];
26720 ready[n_ready - 1] = insn;
26723 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26725 if (sched_verbose > 1)
26726 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26727 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26728 /* Swap 2 top elements of ready list. */
26729 insn = ready[n_ready - 1];
26730 ready[n_ready - 1] = ready[n_ready - 2];
26731 ready[n_ready - 2] = insn;
26737 ix86_class_likely_spilled_p (reg_class_t);
26739 /* Returns true if lhs of insn is HW function argument register and set up
26740 is_spilled to true if it is likely spilled HW register. */
26742 insn_is_function_arg (rtx insn, bool* is_spilled)
26746 if (!NONDEBUG_INSN_P (insn))
26748 /* Call instructions are not movable, ignore it. */
26751 insn = PATTERN (insn);
26752 if (GET_CODE (insn) == PARALLEL)
26753 insn = XVECEXP (insn, 0, 0);
26754 if (GET_CODE (insn) != SET)
26756 dst = SET_DEST (insn);
26757 if (REG_P (dst) && HARD_REGISTER_P (dst)
26758 && ix86_function_arg_regno_p (REGNO (dst)))
26760 /* Is it likely spilled HW register? */
26761 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26762 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26763 *is_spilled = true;
26769 /* Add output dependencies for chain of function adjacent arguments if only
26770 there is a move to likely spilled HW register. Return first argument
26771 if at least one dependence was added or NULL otherwise. */
26773 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26776 rtx_insn *last = call;
26777 rtx_insn *first_arg = NULL;
26778 bool is_spilled = false;
26780 head = PREV_INSN (head);
26782 /* Find nearest to call argument passing instruction. */
26785 last = PREV_INSN (last);
26788 if (!NONDEBUG_INSN_P (last))
26790 if (insn_is_function_arg (last, &is_spilled))
26798 insn = PREV_INSN (last);
26799 if (!INSN_P (insn))
26803 if (!NONDEBUG_INSN_P (insn))
26808 if (insn_is_function_arg (insn, &is_spilled))
26810 /* Add output depdendence between two function arguments if chain
26811 of output arguments contains likely spilled HW registers. */
26813 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26814 first_arg = last = insn;
26824 /* Add output or anti dependency from insn to first_arg to restrict its code
26827 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26832 set = single_set (insn);
26835 tmp = SET_DEST (set);
26838 /* Add output dependency to the first function argument. */
26839 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26842 /* Add anti dependency. */
26843 add_dependence (first_arg, insn, REG_DEP_ANTI);
26846 /* Avoid cross block motion of function argument through adding dependency
26847 from the first non-jump instruction in bb. */
26849 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26851 rtx_insn *insn = BB_END (bb);
26855 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26857 rtx set = single_set (insn);
26860 avoid_func_arg_motion (arg, insn);
26864 if (insn == BB_HEAD (bb))
26866 insn = PREV_INSN (insn);
26870 /* Hook for pre-reload schedule - avoid motion of function arguments
26871 passed in likely spilled HW registers. */
26873 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26876 rtx_insn *first_arg = NULL;
26877 if (reload_completed)
26879 while (head != tail && DEBUG_INSN_P (head))
26880 head = NEXT_INSN (head);
26881 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26882 if (INSN_P (insn) && CALL_P (insn))
26884 first_arg = add_parameter_dependencies (insn, head);
26887 /* Add dependee for first argument to predecessors if only
26888 region contains more than one block. */
26889 basic_block bb = BLOCK_FOR_INSN (insn);
26890 int rgn = CONTAINING_RGN (bb->index);
26891 int nr_blks = RGN_NR_BLOCKS (rgn);
26892 /* Skip trivial regions and region head blocks that can have
26893 predecessors outside of region. */
26894 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26899 /* Regions are SCCs with the exception of selective
26900 scheduling with pipelining of outer blocks enabled.
26901 So also check that immediate predecessors of a non-head
26902 block are in the same region. */
26903 FOR_EACH_EDGE (e, ei, bb->preds)
26905 /* Avoid creating of loop-carried dependencies through
26906 using topological ordering in the region. */
26907 if (rgn == CONTAINING_RGN (e->src->index)
26908 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26909 add_dependee_for_func_arg (first_arg, e->src);
26917 else if (first_arg)
26918 avoid_func_arg_motion (first_arg, insn);
26921 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26922 HW registers to maximum, to schedule them at soon as possible. These are
26923 moves from function argument registers at the top of the function entry
26924 and moves from function return value registers after call. */
26926 ix86_adjust_priority (rtx_insn *insn, int priority)
26930 if (reload_completed)
26933 if (!NONDEBUG_INSN_P (insn))
26936 set = single_set (insn);
26939 rtx tmp = SET_SRC (set);
26941 && HARD_REGISTER_P (tmp)
26942 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26943 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26944 return current_sched_info->sched_max_insns_priority;
26950 /* Model decoder of Core 2/i7.
26951 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26952 track the instruction fetch block boundaries and make sure that long
26953 (9+ bytes) instructions are assigned to D0. */
26955 /* Maximum length of an insn that can be handled by
26956 a secondary decoder unit. '8' for Core 2/i7. */
26957 static int core2i7_secondary_decoder_max_insn_size;
26959 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26960 '16' for Core 2/i7. */
26961 static int core2i7_ifetch_block_size;
26963 /* Maximum number of instructions decoder can handle per cycle.
26964 '6' for Core 2/i7. */
26965 static int core2i7_ifetch_block_max_insns;
26967 typedef struct ix86_first_cycle_multipass_data_ *
26968 ix86_first_cycle_multipass_data_t;
26969 typedef const struct ix86_first_cycle_multipass_data_ *
26970 const_ix86_first_cycle_multipass_data_t;
26972 /* A variable to store target state across calls to max_issue within
26974 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26975 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26977 /* Initialize DATA. */
26979 core2i7_first_cycle_multipass_init (void *_data)
26981 ix86_first_cycle_multipass_data_t data
26982 = (ix86_first_cycle_multipass_data_t) _data;
26984 data->ifetch_block_len = 0;
26985 data->ifetch_block_n_insns = 0;
26986 data->ready_try_change = NULL;
26987 data->ready_try_change_size = 0;
26990 /* Advancing the cycle; reset ifetch block counts. */
26992 core2i7_dfa_post_advance_cycle (void)
26994 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26996 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26998 data->ifetch_block_len = 0;
26999 data->ifetch_block_n_insns = 0;
27002 static int min_insn_size (rtx_insn *);
27004 /* Filter out insns from ready_try that the core will not be able to issue
27005 on current cycle due to decoder. */
27007 core2i7_first_cycle_multipass_filter_ready_try
27008 (const_ix86_first_cycle_multipass_data_t data,
27009 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
27016 if (ready_try[n_ready])
27019 insn = get_ready_element (n_ready);
27020 insn_size = min_insn_size (insn);
27022 if (/* If this is a too long an insn for a secondary decoder ... */
27023 (!first_cycle_insn_p
27024 && insn_size > core2i7_secondary_decoder_max_insn_size)
27025 /* ... or it would not fit into the ifetch block ... */
27026 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
27027 /* ... or the decoder is full already ... */
27028 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
27029 /* ... mask the insn out. */
27031 ready_try[n_ready] = 1;
27033 if (data->ready_try_change)
27034 bitmap_set_bit (data->ready_try_change, n_ready);
27039 /* Prepare for a new round of multipass lookahead scheduling. */
27041 core2i7_first_cycle_multipass_begin (void *_data,
27042 signed char *ready_try, int n_ready,
27043 bool first_cycle_insn_p)
27045 ix86_first_cycle_multipass_data_t data
27046 = (ix86_first_cycle_multipass_data_t) _data;
27047 const_ix86_first_cycle_multipass_data_t prev_data
27048 = ix86_first_cycle_multipass_data;
27050 /* Restore the state from the end of the previous round. */
27051 data->ifetch_block_len = prev_data->ifetch_block_len;
27052 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
27054 /* Filter instructions that cannot be issued on current cycle due to
27055 decoder restrictions. */
27056 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27057 first_cycle_insn_p);
27060 /* INSN is being issued in current solution. Account for its impact on
27061 the decoder model. */
27063 core2i7_first_cycle_multipass_issue (void *_data,
27064 signed char *ready_try, int n_ready,
27065 rtx_insn *insn, const void *_prev_data)
27067 ix86_first_cycle_multipass_data_t data
27068 = (ix86_first_cycle_multipass_data_t) _data;
27069 const_ix86_first_cycle_multipass_data_t prev_data
27070 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27072 int insn_size = min_insn_size (insn);
27074 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27075 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27076 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27077 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27079 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
27080 if (!data->ready_try_change)
27082 data->ready_try_change = sbitmap_alloc (n_ready);
27083 data->ready_try_change_size = n_ready;
27085 else if (data->ready_try_change_size < n_ready)
27087 data->ready_try_change = sbitmap_resize (data->ready_try_change,
27089 data->ready_try_change_size = n_ready;
27091 bitmap_clear (data->ready_try_change);
27093 /* Filter out insns from ready_try that the core will not be able to issue
27094 on current cycle due to decoder. */
27095 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27099 /* Revert the effect on ready_try. */
27101 core2i7_first_cycle_multipass_backtrack (const void *_data,
27102 signed char *ready_try,
27103 int n_ready ATTRIBUTE_UNUSED)
27105 const_ix86_first_cycle_multipass_data_t data
27106 = (const_ix86_first_cycle_multipass_data_t) _data;
27107 unsigned int i = 0;
27108 sbitmap_iterator sbi;
27110 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27111 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27117 /* Save the result of multipass lookahead scheduling for the next round. */
27119 core2i7_first_cycle_multipass_end (const void *_data)
27121 const_ix86_first_cycle_multipass_data_t data
27122 = (const_ix86_first_cycle_multipass_data_t) _data;
27123 ix86_first_cycle_multipass_data_t next_data
27124 = ix86_first_cycle_multipass_data;
27128 next_data->ifetch_block_len = data->ifetch_block_len;
27129 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27133 /* Deallocate target data. */
27135 core2i7_first_cycle_multipass_fini (void *_data)
27137 ix86_first_cycle_multipass_data_t data
27138 = (ix86_first_cycle_multipass_data_t) _data;
27140 if (data->ready_try_change)
27142 sbitmap_free (data->ready_try_change);
27143 data->ready_try_change = NULL;
27144 data->ready_try_change_size = 0;
27148 /* Prepare for scheduling pass. */
27150 ix86_sched_init_global (FILE *, int, int)
27152 /* Install scheduling hooks for current CPU. Some of these hooks are used
27153 in time-critical parts of the scheduler, so we only set them up when
27154 they are actually used. */
27157 case PROCESSOR_CORE2:
27158 case PROCESSOR_NEHALEM:
27159 case PROCESSOR_SANDYBRIDGE:
27160 case PROCESSOR_HASWELL:
27161 /* Do not perform multipass scheduling for pre-reload schedule
27162 to save compile time. */
27163 if (reload_completed)
27165 targetm.sched.dfa_post_advance_cycle
27166 = core2i7_dfa_post_advance_cycle;
27167 targetm.sched.first_cycle_multipass_init
27168 = core2i7_first_cycle_multipass_init;
27169 targetm.sched.first_cycle_multipass_begin
27170 = core2i7_first_cycle_multipass_begin;
27171 targetm.sched.first_cycle_multipass_issue
27172 = core2i7_first_cycle_multipass_issue;
27173 targetm.sched.first_cycle_multipass_backtrack
27174 = core2i7_first_cycle_multipass_backtrack;
27175 targetm.sched.first_cycle_multipass_end
27176 = core2i7_first_cycle_multipass_end;
27177 targetm.sched.first_cycle_multipass_fini
27178 = core2i7_first_cycle_multipass_fini;
27180 /* Set decoder parameters. */
27181 core2i7_secondary_decoder_max_insn_size = 8;
27182 core2i7_ifetch_block_size = 16;
27183 core2i7_ifetch_block_max_insns = 6;
27186 /* ... Fall through ... */
27188 targetm.sched.dfa_post_advance_cycle = NULL;
27189 targetm.sched.first_cycle_multipass_init = NULL;
27190 targetm.sched.first_cycle_multipass_begin = NULL;
27191 targetm.sched.first_cycle_multipass_issue = NULL;
27192 targetm.sched.first_cycle_multipass_backtrack = NULL;
27193 targetm.sched.first_cycle_multipass_end = NULL;
27194 targetm.sched.first_cycle_multipass_fini = NULL;
27200 /* Compute the alignment given to a constant that is being placed in memory.
27201 EXP is the constant and ALIGN is the alignment that the object would
27203 The value of this function is used instead of that alignment to align
27207 ix86_constant_alignment (tree exp, int align)
27209 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27210 || TREE_CODE (exp) == INTEGER_CST)
27212 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27214 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27217 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27218 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27219 return BITS_PER_WORD;
27224 /* Compute the alignment for a static variable.
27225 TYPE is the data type, and ALIGN is the alignment that
27226 the object would ordinarily have. The value of this function is used
27227 instead of that alignment to align the object. */
27230 ix86_data_alignment (tree type, int align, bool opt)
27232 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27233 for symbols from other compilation units or symbols that don't need
27234 to bind locally. In order to preserve some ABI compatibility with
27235 those compilers, ensure we don't decrease alignment from what we
27238 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
27240 /* A data structure, equal or greater than the size of a cache line
27241 (64 bytes in the Pentium 4 and other recent Intel processors, including
27242 processors based on Intel Core microarchitecture) should be aligned
27243 so that its base address is a multiple of a cache line size. */
27246 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27248 if (max_align < BITS_PER_WORD)
27249 max_align = BITS_PER_WORD;
27251 switch (ix86_align_data_type)
27253 case ix86_align_data_type_abi: opt = false; break;
27254 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
27255 case ix86_align_data_type_cacheline: break;
27259 && AGGREGATE_TYPE_P (type)
27260 && TYPE_SIZE (type)
27261 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27263 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27264 && align < max_align_compat)
27265 align = max_align_compat;
27266 if (wi::geu_p (TYPE_SIZE (type), max_align)
27267 && align < max_align)
27271 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27272 to 16byte boundary. */
27275 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27276 && TYPE_SIZE (type)
27277 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27278 && wi::geu_p (TYPE_SIZE (type), 128)
27286 if (TREE_CODE (type) == ARRAY_TYPE)
27288 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27290 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27293 else if (TREE_CODE (type) == COMPLEX_TYPE)
27296 if (TYPE_MODE (type) == DCmode && align < 64)
27298 if ((TYPE_MODE (type) == XCmode
27299 || TYPE_MODE (type) == TCmode) && align < 128)
27302 else if ((TREE_CODE (type) == RECORD_TYPE
27303 || TREE_CODE (type) == UNION_TYPE
27304 || TREE_CODE (type) == QUAL_UNION_TYPE)
27305 && TYPE_FIELDS (type))
27307 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27309 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27312 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27313 || TREE_CODE (type) == INTEGER_TYPE)
27315 if (TYPE_MODE (type) == DFmode && align < 64)
27317 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27324 /* Compute the alignment for a local variable or a stack slot. EXP is
27325 the data type or decl itself, MODE is the widest mode available and
27326 ALIGN is the alignment that the object would ordinarily have. The
27327 value of this macro is used instead of that alignment to align the
27331 ix86_local_alignment (tree exp, machine_mode mode,
27332 unsigned int align)
27336 if (exp && DECL_P (exp))
27338 type = TREE_TYPE (exp);
27347 /* Don't do dynamic stack realignment for long long objects with
27348 -mpreferred-stack-boundary=2. */
27351 && ix86_preferred_stack_boundary < 64
27352 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27353 && (!type || !TYPE_USER_ALIGN (type))
27354 && (!decl || !DECL_USER_ALIGN (decl)))
27357 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27358 register in MODE. We will return the largest alignment of XF
27362 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27363 align = GET_MODE_ALIGNMENT (DFmode);
27367 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27368 to 16byte boundary. Exact wording is:
27370 An array uses the same alignment as its elements, except that a local or
27371 global array variable of length at least 16 bytes or
27372 a C99 variable-length array variable always has alignment of at least 16 bytes.
27374 This was added to allow use of aligned SSE instructions at arrays. This
27375 rule is meant for static storage (where compiler can not do the analysis
27376 by itself). We follow it for automatic variables only when convenient.
27377 We fully control everything in the function compiled and functions from
27378 other unit can not rely on the alignment.
27380 Exclude va_list type. It is the common case of local array where
27381 we can not benefit from the alignment.
27383 TODO: Probably one should optimize for size only when var is not escaping. */
27384 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27387 if (AGGREGATE_TYPE_P (type)
27388 && (va_list_type_node == NULL_TREE
27389 || (TYPE_MAIN_VARIANT (type)
27390 != TYPE_MAIN_VARIANT (va_list_type_node)))
27391 && TYPE_SIZE (type)
27392 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27393 && wi::geu_p (TYPE_SIZE (type), 16)
27397 if (TREE_CODE (type) == ARRAY_TYPE)
27399 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27401 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27404 else if (TREE_CODE (type) == COMPLEX_TYPE)
27406 if (TYPE_MODE (type) == DCmode && align < 64)
27408 if ((TYPE_MODE (type) == XCmode
27409 || TYPE_MODE (type) == TCmode) && align < 128)
27412 else if ((TREE_CODE (type) == RECORD_TYPE
27413 || TREE_CODE (type) == UNION_TYPE
27414 || TREE_CODE (type) == QUAL_UNION_TYPE)
27415 && TYPE_FIELDS (type))
27417 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27419 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27422 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27423 || TREE_CODE (type) == INTEGER_TYPE)
27426 if (TYPE_MODE (type) == DFmode && align < 64)
27428 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27434 /* Compute the minimum required alignment for dynamic stack realignment
27435 purposes for a local variable, parameter or a stack slot. EXP is
27436 the data type or decl itself, MODE is its mode and ALIGN is the
27437 alignment that the object would ordinarily have. */
27440 ix86_minimum_alignment (tree exp, machine_mode mode,
27441 unsigned int align)
27445 if (exp && DECL_P (exp))
27447 type = TREE_TYPE (exp);
27456 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27459 /* Don't do dynamic stack realignment for long long objects with
27460 -mpreferred-stack-boundary=2. */
27461 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27462 && (!type || !TYPE_USER_ALIGN (type))
27463 && (!decl || !DECL_USER_ALIGN (decl)))
27469 /* Find a location for the static chain incoming to a nested function.
27470 This is a register, unless all free registers are used by arguments. */
27473 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27477 /* While this function won't be called by the middle-end when a static
27478 chain isn't needed, it's also used throughout the backend so it's
27479 easiest to keep this check centralized. */
27480 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27485 /* We always use R10 in 64-bit mode. */
27490 const_tree fntype, fndecl;
27493 /* By default in 32-bit mode we use ECX to pass the static chain. */
27496 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27498 fntype = TREE_TYPE (fndecl_or_type);
27499 fndecl = fndecl_or_type;
27503 fntype = fndecl_or_type;
27507 ccvt = ix86_get_callcvt (fntype);
27508 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27510 /* Fastcall functions use ecx/edx for arguments, which leaves
27511 us with EAX for the static chain.
27512 Thiscall functions use ecx for arguments, which also
27513 leaves us with EAX for the static chain. */
27516 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27518 /* Thiscall functions use ecx for arguments, which leaves
27519 us with EAX and EDX for the static chain.
27520 We are using for abi-compatibility EAX. */
27523 else if (ix86_function_regparm (fntype, fndecl) == 3)
27525 /* For regparm 3, we have no free call-clobbered registers in
27526 which to store the static chain. In order to implement this,
27527 we have the trampoline push the static chain to the stack.
27528 However, we can't push a value below the return address when
27529 we call the nested function directly, so we have to use an
27530 alternate entry point. For this we use ESI, and have the
27531 alternate entry point push ESI, so that things appear the
27532 same once we're executing the nested function. */
27535 if (fndecl == current_function_decl)
27536 ix86_static_chain_on_stack = true;
27537 return gen_frame_mem (SImode,
27538 plus_constant (Pmode,
27539 arg_pointer_rtx, -8));
27545 return gen_rtx_REG (Pmode, regno);
27548 /* Emit RTL insns to initialize the variable parts of a trampoline.
27549 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27550 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27551 to be passed to the target function. */
27554 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27560 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27566 /* Load the function address to r11. Try to load address using
27567 the shorter movl instead of movabs. We may want to support
27568 movq for kernel mode, but kernel does not use trampolines at
27569 the moment. FNADDR is a 32bit address and may not be in
27570 DImode when ptr_mode == SImode. Always use movl in this
27572 if (ptr_mode == SImode
27573 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27575 fnaddr = copy_addr_to_reg (fnaddr);
27577 mem = adjust_address (m_tramp, HImode, offset);
27578 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27580 mem = adjust_address (m_tramp, SImode, offset + 2);
27581 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27586 mem = adjust_address (m_tramp, HImode, offset);
27587 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27589 mem = adjust_address (m_tramp, DImode, offset + 2);
27590 emit_move_insn (mem, fnaddr);
27594 /* Load static chain using movabs to r10. Use the shorter movl
27595 instead of movabs when ptr_mode == SImode. */
27596 if (ptr_mode == SImode)
27607 mem = adjust_address (m_tramp, HImode, offset);
27608 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27610 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27611 emit_move_insn (mem, chain_value);
27614 /* Jump to r11; the last (unused) byte is a nop, only there to
27615 pad the write out to a single 32-bit store. */
27616 mem = adjust_address (m_tramp, SImode, offset);
27617 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27624 /* Depending on the static chain location, either load a register
27625 with a constant, or push the constant to the stack. All of the
27626 instructions are the same size. */
27627 chain = ix86_static_chain (fndecl, true);
27630 switch (REGNO (chain))
27633 opcode = 0xb8; break;
27635 opcode = 0xb9; break;
27637 gcc_unreachable ();
27643 mem = adjust_address (m_tramp, QImode, offset);
27644 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27646 mem = adjust_address (m_tramp, SImode, offset + 1);
27647 emit_move_insn (mem, chain_value);
27650 mem = adjust_address (m_tramp, QImode, offset);
27651 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27653 mem = adjust_address (m_tramp, SImode, offset + 1);
27655 /* Compute offset from the end of the jmp to the target function.
27656 In the case in which the trampoline stores the static chain on
27657 the stack, we need to skip the first insn which pushes the
27658 (call-saved) register static chain; this push is 1 byte. */
27660 disp = expand_binop (SImode, sub_optab, fnaddr,
27661 plus_constant (Pmode, XEXP (m_tramp, 0),
27662 offset - (MEM_P (chain) ? 1 : 0)),
27663 NULL_RTX, 1, OPTAB_DIRECT);
27664 emit_move_insn (mem, disp);
27667 gcc_assert (offset <= TRAMPOLINE_SIZE);
27669 #ifdef HAVE_ENABLE_EXECUTE_STACK
27670 #ifdef CHECK_EXECUTE_STACK_ENABLED
27671 if (CHECK_EXECUTE_STACK_ENABLED)
27673 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27674 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27678 /* The following file contains several enumerations and data structures
27679 built from the definitions in i386-builtin-types.def. */
27681 #include "i386-builtin-types.inc"
27683 /* Table for the ix86 builtin non-function types. */
27684 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27686 /* Retrieve an element from the above table, building some of
27687 the types lazily. */
27690 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27692 unsigned int index;
27695 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27697 type = ix86_builtin_type_tab[(int) tcode];
27701 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27702 if (tcode <= IX86_BT_LAST_VECT)
27706 index = tcode - IX86_BT_LAST_PRIM - 1;
27707 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27708 mode = ix86_builtin_type_vect_mode[index];
27710 type = build_vector_type_for_mode (itype, mode);
27716 index = tcode - IX86_BT_LAST_VECT - 1;
27717 if (tcode <= IX86_BT_LAST_PTR)
27718 quals = TYPE_UNQUALIFIED;
27720 quals = TYPE_QUAL_CONST;
27722 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27723 if (quals != TYPE_UNQUALIFIED)
27724 itype = build_qualified_type (itype, quals);
27726 type = build_pointer_type (itype);
27729 ix86_builtin_type_tab[(int) tcode] = type;
27733 /* Table for the ix86 builtin function types. */
27734 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27736 /* Retrieve an element from the above table, building some of
27737 the types lazily. */
27740 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27744 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27746 type = ix86_builtin_func_type_tab[(int) tcode];
27750 if (tcode <= IX86_BT_LAST_FUNC)
27752 unsigned start = ix86_builtin_func_start[(int) tcode];
27753 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27754 tree rtype, atype, args = void_list_node;
27757 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27758 for (i = after - 1; i > start; --i)
27760 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27761 args = tree_cons (NULL, atype, args);
27764 type = build_function_type (rtype, args);
27768 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27769 enum ix86_builtin_func_type icode;
27771 icode = ix86_builtin_func_alias_base[index];
27772 type = ix86_get_builtin_func_type (icode);
27775 ix86_builtin_func_type_tab[(int) tcode] = type;
27780 /* Codes for all the SSE/MMX builtins. */
27783 IX86_BUILTIN_ADDPS,
27784 IX86_BUILTIN_ADDSS,
27785 IX86_BUILTIN_DIVPS,
27786 IX86_BUILTIN_DIVSS,
27787 IX86_BUILTIN_MULPS,
27788 IX86_BUILTIN_MULSS,
27789 IX86_BUILTIN_SUBPS,
27790 IX86_BUILTIN_SUBSS,
27792 IX86_BUILTIN_CMPEQPS,
27793 IX86_BUILTIN_CMPLTPS,
27794 IX86_BUILTIN_CMPLEPS,
27795 IX86_BUILTIN_CMPGTPS,
27796 IX86_BUILTIN_CMPGEPS,
27797 IX86_BUILTIN_CMPNEQPS,
27798 IX86_BUILTIN_CMPNLTPS,
27799 IX86_BUILTIN_CMPNLEPS,
27800 IX86_BUILTIN_CMPNGTPS,
27801 IX86_BUILTIN_CMPNGEPS,
27802 IX86_BUILTIN_CMPORDPS,
27803 IX86_BUILTIN_CMPUNORDPS,
27804 IX86_BUILTIN_CMPEQSS,
27805 IX86_BUILTIN_CMPLTSS,
27806 IX86_BUILTIN_CMPLESS,
27807 IX86_BUILTIN_CMPNEQSS,
27808 IX86_BUILTIN_CMPNLTSS,
27809 IX86_BUILTIN_CMPNLESS,
27810 IX86_BUILTIN_CMPORDSS,
27811 IX86_BUILTIN_CMPUNORDSS,
27813 IX86_BUILTIN_COMIEQSS,
27814 IX86_BUILTIN_COMILTSS,
27815 IX86_BUILTIN_COMILESS,
27816 IX86_BUILTIN_COMIGTSS,
27817 IX86_BUILTIN_COMIGESS,
27818 IX86_BUILTIN_COMINEQSS,
27819 IX86_BUILTIN_UCOMIEQSS,
27820 IX86_BUILTIN_UCOMILTSS,
27821 IX86_BUILTIN_UCOMILESS,
27822 IX86_BUILTIN_UCOMIGTSS,
27823 IX86_BUILTIN_UCOMIGESS,
27824 IX86_BUILTIN_UCOMINEQSS,
27826 IX86_BUILTIN_CVTPI2PS,
27827 IX86_BUILTIN_CVTPS2PI,
27828 IX86_BUILTIN_CVTSI2SS,
27829 IX86_BUILTIN_CVTSI642SS,
27830 IX86_BUILTIN_CVTSS2SI,
27831 IX86_BUILTIN_CVTSS2SI64,
27832 IX86_BUILTIN_CVTTPS2PI,
27833 IX86_BUILTIN_CVTTSS2SI,
27834 IX86_BUILTIN_CVTTSS2SI64,
27836 IX86_BUILTIN_MAXPS,
27837 IX86_BUILTIN_MAXSS,
27838 IX86_BUILTIN_MINPS,
27839 IX86_BUILTIN_MINSS,
27841 IX86_BUILTIN_LOADUPS,
27842 IX86_BUILTIN_STOREUPS,
27843 IX86_BUILTIN_MOVSS,
27845 IX86_BUILTIN_MOVHLPS,
27846 IX86_BUILTIN_MOVLHPS,
27847 IX86_BUILTIN_LOADHPS,
27848 IX86_BUILTIN_LOADLPS,
27849 IX86_BUILTIN_STOREHPS,
27850 IX86_BUILTIN_STORELPS,
27852 IX86_BUILTIN_MASKMOVQ,
27853 IX86_BUILTIN_MOVMSKPS,
27854 IX86_BUILTIN_PMOVMSKB,
27856 IX86_BUILTIN_MOVNTPS,
27857 IX86_BUILTIN_MOVNTQ,
27859 IX86_BUILTIN_LOADDQU,
27860 IX86_BUILTIN_STOREDQU,
27862 IX86_BUILTIN_PACKSSWB,
27863 IX86_BUILTIN_PACKSSDW,
27864 IX86_BUILTIN_PACKUSWB,
27866 IX86_BUILTIN_PADDB,
27867 IX86_BUILTIN_PADDW,
27868 IX86_BUILTIN_PADDD,
27869 IX86_BUILTIN_PADDQ,
27870 IX86_BUILTIN_PADDSB,
27871 IX86_BUILTIN_PADDSW,
27872 IX86_BUILTIN_PADDUSB,
27873 IX86_BUILTIN_PADDUSW,
27874 IX86_BUILTIN_PSUBB,
27875 IX86_BUILTIN_PSUBW,
27876 IX86_BUILTIN_PSUBD,
27877 IX86_BUILTIN_PSUBQ,
27878 IX86_BUILTIN_PSUBSB,
27879 IX86_BUILTIN_PSUBSW,
27880 IX86_BUILTIN_PSUBUSB,
27881 IX86_BUILTIN_PSUBUSW,
27884 IX86_BUILTIN_PANDN,
27888 IX86_BUILTIN_PAVGB,
27889 IX86_BUILTIN_PAVGW,
27891 IX86_BUILTIN_PCMPEQB,
27892 IX86_BUILTIN_PCMPEQW,
27893 IX86_BUILTIN_PCMPEQD,
27894 IX86_BUILTIN_PCMPGTB,
27895 IX86_BUILTIN_PCMPGTW,
27896 IX86_BUILTIN_PCMPGTD,
27898 IX86_BUILTIN_PMADDWD,
27900 IX86_BUILTIN_PMAXSW,
27901 IX86_BUILTIN_PMAXUB,
27902 IX86_BUILTIN_PMINSW,
27903 IX86_BUILTIN_PMINUB,
27905 IX86_BUILTIN_PMULHUW,
27906 IX86_BUILTIN_PMULHW,
27907 IX86_BUILTIN_PMULLW,
27909 IX86_BUILTIN_PSADBW,
27910 IX86_BUILTIN_PSHUFW,
27912 IX86_BUILTIN_PSLLW,
27913 IX86_BUILTIN_PSLLD,
27914 IX86_BUILTIN_PSLLQ,
27915 IX86_BUILTIN_PSRAW,
27916 IX86_BUILTIN_PSRAD,
27917 IX86_BUILTIN_PSRLW,
27918 IX86_BUILTIN_PSRLD,
27919 IX86_BUILTIN_PSRLQ,
27920 IX86_BUILTIN_PSLLWI,
27921 IX86_BUILTIN_PSLLDI,
27922 IX86_BUILTIN_PSLLQI,
27923 IX86_BUILTIN_PSRAWI,
27924 IX86_BUILTIN_PSRADI,
27925 IX86_BUILTIN_PSRLWI,
27926 IX86_BUILTIN_PSRLDI,
27927 IX86_BUILTIN_PSRLQI,
27929 IX86_BUILTIN_PUNPCKHBW,
27930 IX86_BUILTIN_PUNPCKHWD,
27931 IX86_BUILTIN_PUNPCKHDQ,
27932 IX86_BUILTIN_PUNPCKLBW,
27933 IX86_BUILTIN_PUNPCKLWD,
27934 IX86_BUILTIN_PUNPCKLDQ,
27936 IX86_BUILTIN_SHUFPS,
27938 IX86_BUILTIN_RCPPS,
27939 IX86_BUILTIN_RCPSS,
27940 IX86_BUILTIN_RSQRTPS,
27941 IX86_BUILTIN_RSQRTPS_NR,
27942 IX86_BUILTIN_RSQRTSS,
27943 IX86_BUILTIN_RSQRTF,
27944 IX86_BUILTIN_SQRTPS,
27945 IX86_BUILTIN_SQRTPS_NR,
27946 IX86_BUILTIN_SQRTSS,
27948 IX86_BUILTIN_UNPCKHPS,
27949 IX86_BUILTIN_UNPCKLPS,
27951 IX86_BUILTIN_ANDPS,
27952 IX86_BUILTIN_ANDNPS,
27954 IX86_BUILTIN_XORPS,
27957 IX86_BUILTIN_LDMXCSR,
27958 IX86_BUILTIN_STMXCSR,
27959 IX86_BUILTIN_SFENCE,
27961 IX86_BUILTIN_FXSAVE,
27962 IX86_BUILTIN_FXRSTOR,
27963 IX86_BUILTIN_FXSAVE64,
27964 IX86_BUILTIN_FXRSTOR64,
27966 IX86_BUILTIN_XSAVE,
27967 IX86_BUILTIN_XRSTOR,
27968 IX86_BUILTIN_XSAVE64,
27969 IX86_BUILTIN_XRSTOR64,
27971 IX86_BUILTIN_XSAVEOPT,
27972 IX86_BUILTIN_XSAVEOPT64,
27974 IX86_BUILTIN_XSAVEC,
27975 IX86_BUILTIN_XSAVEC64,
27977 IX86_BUILTIN_XSAVES,
27978 IX86_BUILTIN_XRSTORS,
27979 IX86_BUILTIN_XSAVES64,
27980 IX86_BUILTIN_XRSTORS64,
27982 /* 3DNow! Original */
27983 IX86_BUILTIN_FEMMS,
27984 IX86_BUILTIN_PAVGUSB,
27985 IX86_BUILTIN_PF2ID,
27986 IX86_BUILTIN_PFACC,
27987 IX86_BUILTIN_PFADD,
27988 IX86_BUILTIN_PFCMPEQ,
27989 IX86_BUILTIN_PFCMPGE,
27990 IX86_BUILTIN_PFCMPGT,
27991 IX86_BUILTIN_PFMAX,
27992 IX86_BUILTIN_PFMIN,
27993 IX86_BUILTIN_PFMUL,
27994 IX86_BUILTIN_PFRCP,
27995 IX86_BUILTIN_PFRCPIT1,
27996 IX86_BUILTIN_PFRCPIT2,
27997 IX86_BUILTIN_PFRSQIT1,
27998 IX86_BUILTIN_PFRSQRT,
27999 IX86_BUILTIN_PFSUB,
28000 IX86_BUILTIN_PFSUBR,
28001 IX86_BUILTIN_PI2FD,
28002 IX86_BUILTIN_PMULHRW,
28004 /* 3DNow! Athlon Extensions */
28005 IX86_BUILTIN_PF2IW,
28006 IX86_BUILTIN_PFNACC,
28007 IX86_BUILTIN_PFPNACC,
28008 IX86_BUILTIN_PI2FW,
28009 IX86_BUILTIN_PSWAPDSI,
28010 IX86_BUILTIN_PSWAPDSF,
28013 IX86_BUILTIN_ADDPD,
28014 IX86_BUILTIN_ADDSD,
28015 IX86_BUILTIN_DIVPD,
28016 IX86_BUILTIN_DIVSD,
28017 IX86_BUILTIN_MULPD,
28018 IX86_BUILTIN_MULSD,
28019 IX86_BUILTIN_SUBPD,
28020 IX86_BUILTIN_SUBSD,
28022 IX86_BUILTIN_CMPEQPD,
28023 IX86_BUILTIN_CMPLTPD,
28024 IX86_BUILTIN_CMPLEPD,
28025 IX86_BUILTIN_CMPGTPD,
28026 IX86_BUILTIN_CMPGEPD,
28027 IX86_BUILTIN_CMPNEQPD,
28028 IX86_BUILTIN_CMPNLTPD,
28029 IX86_BUILTIN_CMPNLEPD,
28030 IX86_BUILTIN_CMPNGTPD,
28031 IX86_BUILTIN_CMPNGEPD,
28032 IX86_BUILTIN_CMPORDPD,
28033 IX86_BUILTIN_CMPUNORDPD,
28034 IX86_BUILTIN_CMPEQSD,
28035 IX86_BUILTIN_CMPLTSD,
28036 IX86_BUILTIN_CMPLESD,
28037 IX86_BUILTIN_CMPNEQSD,
28038 IX86_BUILTIN_CMPNLTSD,
28039 IX86_BUILTIN_CMPNLESD,
28040 IX86_BUILTIN_CMPORDSD,
28041 IX86_BUILTIN_CMPUNORDSD,
28043 IX86_BUILTIN_COMIEQSD,
28044 IX86_BUILTIN_COMILTSD,
28045 IX86_BUILTIN_COMILESD,
28046 IX86_BUILTIN_COMIGTSD,
28047 IX86_BUILTIN_COMIGESD,
28048 IX86_BUILTIN_COMINEQSD,
28049 IX86_BUILTIN_UCOMIEQSD,
28050 IX86_BUILTIN_UCOMILTSD,
28051 IX86_BUILTIN_UCOMILESD,
28052 IX86_BUILTIN_UCOMIGTSD,
28053 IX86_BUILTIN_UCOMIGESD,
28054 IX86_BUILTIN_UCOMINEQSD,
28056 IX86_BUILTIN_MAXPD,
28057 IX86_BUILTIN_MAXSD,
28058 IX86_BUILTIN_MINPD,
28059 IX86_BUILTIN_MINSD,
28061 IX86_BUILTIN_ANDPD,
28062 IX86_BUILTIN_ANDNPD,
28064 IX86_BUILTIN_XORPD,
28066 IX86_BUILTIN_SQRTPD,
28067 IX86_BUILTIN_SQRTSD,
28069 IX86_BUILTIN_UNPCKHPD,
28070 IX86_BUILTIN_UNPCKLPD,
28072 IX86_BUILTIN_SHUFPD,
28074 IX86_BUILTIN_LOADUPD,
28075 IX86_BUILTIN_STOREUPD,
28076 IX86_BUILTIN_MOVSD,
28078 IX86_BUILTIN_LOADHPD,
28079 IX86_BUILTIN_LOADLPD,
28081 IX86_BUILTIN_CVTDQ2PD,
28082 IX86_BUILTIN_CVTDQ2PS,
28084 IX86_BUILTIN_CVTPD2DQ,
28085 IX86_BUILTIN_CVTPD2PI,
28086 IX86_BUILTIN_CVTPD2PS,
28087 IX86_BUILTIN_CVTTPD2DQ,
28088 IX86_BUILTIN_CVTTPD2PI,
28090 IX86_BUILTIN_CVTPI2PD,
28091 IX86_BUILTIN_CVTSI2SD,
28092 IX86_BUILTIN_CVTSI642SD,
28094 IX86_BUILTIN_CVTSD2SI,
28095 IX86_BUILTIN_CVTSD2SI64,
28096 IX86_BUILTIN_CVTSD2SS,
28097 IX86_BUILTIN_CVTSS2SD,
28098 IX86_BUILTIN_CVTTSD2SI,
28099 IX86_BUILTIN_CVTTSD2SI64,
28101 IX86_BUILTIN_CVTPS2DQ,
28102 IX86_BUILTIN_CVTPS2PD,
28103 IX86_BUILTIN_CVTTPS2DQ,
28105 IX86_BUILTIN_MOVNTI,
28106 IX86_BUILTIN_MOVNTI64,
28107 IX86_BUILTIN_MOVNTPD,
28108 IX86_BUILTIN_MOVNTDQ,
28110 IX86_BUILTIN_MOVQ128,
28113 IX86_BUILTIN_MASKMOVDQU,
28114 IX86_BUILTIN_MOVMSKPD,
28115 IX86_BUILTIN_PMOVMSKB128,
28117 IX86_BUILTIN_PACKSSWB128,
28118 IX86_BUILTIN_PACKSSDW128,
28119 IX86_BUILTIN_PACKUSWB128,
28121 IX86_BUILTIN_PADDB128,
28122 IX86_BUILTIN_PADDW128,
28123 IX86_BUILTIN_PADDD128,
28124 IX86_BUILTIN_PADDQ128,
28125 IX86_BUILTIN_PADDSB128,
28126 IX86_BUILTIN_PADDSW128,
28127 IX86_BUILTIN_PADDUSB128,
28128 IX86_BUILTIN_PADDUSW128,
28129 IX86_BUILTIN_PSUBB128,
28130 IX86_BUILTIN_PSUBW128,
28131 IX86_BUILTIN_PSUBD128,
28132 IX86_BUILTIN_PSUBQ128,
28133 IX86_BUILTIN_PSUBSB128,
28134 IX86_BUILTIN_PSUBSW128,
28135 IX86_BUILTIN_PSUBUSB128,
28136 IX86_BUILTIN_PSUBUSW128,
28138 IX86_BUILTIN_PAND128,
28139 IX86_BUILTIN_PANDN128,
28140 IX86_BUILTIN_POR128,
28141 IX86_BUILTIN_PXOR128,
28143 IX86_BUILTIN_PAVGB128,
28144 IX86_BUILTIN_PAVGW128,
28146 IX86_BUILTIN_PCMPEQB128,
28147 IX86_BUILTIN_PCMPEQW128,
28148 IX86_BUILTIN_PCMPEQD128,
28149 IX86_BUILTIN_PCMPGTB128,
28150 IX86_BUILTIN_PCMPGTW128,
28151 IX86_BUILTIN_PCMPGTD128,
28153 IX86_BUILTIN_PMADDWD128,
28155 IX86_BUILTIN_PMAXSW128,
28156 IX86_BUILTIN_PMAXUB128,
28157 IX86_BUILTIN_PMINSW128,
28158 IX86_BUILTIN_PMINUB128,
28160 IX86_BUILTIN_PMULUDQ,
28161 IX86_BUILTIN_PMULUDQ128,
28162 IX86_BUILTIN_PMULHUW128,
28163 IX86_BUILTIN_PMULHW128,
28164 IX86_BUILTIN_PMULLW128,
28166 IX86_BUILTIN_PSADBW128,
28167 IX86_BUILTIN_PSHUFHW,
28168 IX86_BUILTIN_PSHUFLW,
28169 IX86_BUILTIN_PSHUFD,
28171 IX86_BUILTIN_PSLLDQI128,
28172 IX86_BUILTIN_PSLLWI128,
28173 IX86_BUILTIN_PSLLDI128,
28174 IX86_BUILTIN_PSLLQI128,
28175 IX86_BUILTIN_PSRAWI128,
28176 IX86_BUILTIN_PSRADI128,
28177 IX86_BUILTIN_PSRLDQI128,
28178 IX86_BUILTIN_PSRLWI128,
28179 IX86_BUILTIN_PSRLDI128,
28180 IX86_BUILTIN_PSRLQI128,
28182 IX86_BUILTIN_PSLLDQ128,
28183 IX86_BUILTIN_PSLLW128,
28184 IX86_BUILTIN_PSLLD128,
28185 IX86_BUILTIN_PSLLQ128,
28186 IX86_BUILTIN_PSRAW128,
28187 IX86_BUILTIN_PSRAD128,
28188 IX86_BUILTIN_PSRLW128,
28189 IX86_BUILTIN_PSRLD128,
28190 IX86_BUILTIN_PSRLQ128,
28192 IX86_BUILTIN_PUNPCKHBW128,
28193 IX86_BUILTIN_PUNPCKHWD128,
28194 IX86_BUILTIN_PUNPCKHDQ128,
28195 IX86_BUILTIN_PUNPCKHQDQ128,
28196 IX86_BUILTIN_PUNPCKLBW128,
28197 IX86_BUILTIN_PUNPCKLWD128,
28198 IX86_BUILTIN_PUNPCKLDQ128,
28199 IX86_BUILTIN_PUNPCKLQDQ128,
28201 IX86_BUILTIN_CLFLUSH,
28202 IX86_BUILTIN_MFENCE,
28203 IX86_BUILTIN_LFENCE,
28204 IX86_BUILTIN_PAUSE,
28206 IX86_BUILTIN_FNSTENV,
28207 IX86_BUILTIN_FLDENV,
28208 IX86_BUILTIN_FNSTSW,
28209 IX86_BUILTIN_FNCLEX,
28211 IX86_BUILTIN_BSRSI,
28212 IX86_BUILTIN_BSRDI,
28213 IX86_BUILTIN_RDPMC,
28214 IX86_BUILTIN_RDTSC,
28215 IX86_BUILTIN_RDTSCP,
28216 IX86_BUILTIN_ROLQI,
28217 IX86_BUILTIN_ROLHI,
28218 IX86_BUILTIN_RORQI,
28219 IX86_BUILTIN_RORHI,
28222 IX86_BUILTIN_ADDSUBPS,
28223 IX86_BUILTIN_HADDPS,
28224 IX86_BUILTIN_HSUBPS,
28225 IX86_BUILTIN_MOVSHDUP,
28226 IX86_BUILTIN_MOVSLDUP,
28227 IX86_BUILTIN_ADDSUBPD,
28228 IX86_BUILTIN_HADDPD,
28229 IX86_BUILTIN_HSUBPD,
28230 IX86_BUILTIN_LDDQU,
28232 IX86_BUILTIN_MONITOR,
28233 IX86_BUILTIN_MWAIT,
28236 IX86_BUILTIN_PHADDW,
28237 IX86_BUILTIN_PHADDD,
28238 IX86_BUILTIN_PHADDSW,
28239 IX86_BUILTIN_PHSUBW,
28240 IX86_BUILTIN_PHSUBD,
28241 IX86_BUILTIN_PHSUBSW,
28242 IX86_BUILTIN_PMADDUBSW,
28243 IX86_BUILTIN_PMULHRSW,
28244 IX86_BUILTIN_PSHUFB,
28245 IX86_BUILTIN_PSIGNB,
28246 IX86_BUILTIN_PSIGNW,
28247 IX86_BUILTIN_PSIGND,
28248 IX86_BUILTIN_PALIGNR,
28249 IX86_BUILTIN_PABSB,
28250 IX86_BUILTIN_PABSW,
28251 IX86_BUILTIN_PABSD,
28253 IX86_BUILTIN_PHADDW128,
28254 IX86_BUILTIN_PHADDD128,
28255 IX86_BUILTIN_PHADDSW128,
28256 IX86_BUILTIN_PHSUBW128,
28257 IX86_BUILTIN_PHSUBD128,
28258 IX86_BUILTIN_PHSUBSW128,
28259 IX86_BUILTIN_PMADDUBSW128,
28260 IX86_BUILTIN_PMULHRSW128,
28261 IX86_BUILTIN_PSHUFB128,
28262 IX86_BUILTIN_PSIGNB128,
28263 IX86_BUILTIN_PSIGNW128,
28264 IX86_BUILTIN_PSIGND128,
28265 IX86_BUILTIN_PALIGNR128,
28266 IX86_BUILTIN_PABSB128,
28267 IX86_BUILTIN_PABSW128,
28268 IX86_BUILTIN_PABSD128,
28270 /* AMDFAM10 - SSE4A New Instructions. */
28271 IX86_BUILTIN_MOVNTSD,
28272 IX86_BUILTIN_MOVNTSS,
28273 IX86_BUILTIN_EXTRQI,
28274 IX86_BUILTIN_EXTRQ,
28275 IX86_BUILTIN_INSERTQI,
28276 IX86_BUILTIN_INSERTQ,
28279 IX86_BUILTIN_BLENDPD,
28280 IX86_BUILTIN_BLENDPS,
28281 IX86_BUILTIN_BLENDVPD,
28282 IX86_BUILTIN_BLENDVPS,
28283 IX86_BUILTIN_PBLENDVB128,
28284 IX86_BUILTIN_PBLENDW128,
28289 IX86_BUILTIN_INSERTPS128,
28291 IX86_BUILTIN_MOVNTDQA,
28292 IX86_BUILTIN_MPSADBW128,
28293 IX86_BUILTIN_PACKUSDW128,
28294 IX86_BUILTIN_PCMPEQQ,
28295 IX86_BUILTIN_PHMINPOSUW128,
28297 IX86_BUILTIN_PMAXSB128,
28298 IX86_BUILTIN_PMAXSD128,
28299 IX86_BUILTIN_PMAXUD128,
28300 IX86_BUILTIN_PMAXUW128,
28302 IX86_BUILTIN_PMINSB128,
28303 IX86_BUILTIN_PMINSD128,
28304 IX86_BUILTIN_PMINUD128,
28305 IX86_BUILTIN_PMINUW128,
28307 IX86_BUILTIN_PMOVSXBW128,
28308 IX86_BUILTIN_PMOVSXBD128,
28309 IX86_BUILTIN_PMOVSXBQ128,
28310 IX86_BUILTIN_PMOVSXWD128,
28311 IX86_BUILTIN_PMOVSXWQ128,
28312 IX86_BUILTIN_PMOVSXDQ128,
28314 IX86_BUILTIN_PMOVZXBW128,
28315 IX86_BUILTIN_PMOVZXBD128,
28316 IX86_BUILTIN_PMOVZXBQ128,
28317 IX86_BUILTIN_PMOVZXWD128,
28318 IX86_BUILTIN_PMOVZXWQ128,
28319 IX86_BUILTIN_PMOVZXDQ128,
28321 IX86_BUILTIN_PMULDQ128,
28322 IX86_BUILTIN_PMULLD128,
28324 IX86_BUILTIN_ROUNDSD,
28325 IX86_BUILTIN_ROUNDSS,
28327 IX86_BUILTIN_ROUNDPD,
28328 IX86_BUILTIN_ROUNDPS,
28330 IX86_BUILTIN_FLOORPD,
28331 IX86_BUILTIN_CEILPD,
28332 IX86_BUILTIN_TRUNCPD,
28333 IX86_BUILTIN_RINTPD,
28334 IX86_BUILTIN_ROUNDPD_AZ,
28336 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28337 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28338 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28340 IX86_BUILTIN_FLOORPS,
28341 IX86_BUILTIN_CEILPS,
28342 IX86_BUILTIN_TRUNCPS,
28343 IX86_BUILTIN_RINTPS,
28344 IX86_BUILTIN_ROUNDPS_AZ,
28346 IX86_BUILTIN_FLOORPS_SFIX,
28347 IX86_BUILTIN_CEILPS_SFIX,
28348 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28350 IX86_BUILTIN_PTESTZ,
28351 IX86_BUILTIN_PTESTC,
28352 IX86_BUILTIN_PTESTNZC,
28354 IX86_BUILTIN_VEC_INIT_V2SI,
28355 IX86_BUILTIN_VEC_INIT_V4HI,
28356 IX86_BUILTIN_VEC_INIT_V8QI,
28357 IX86_BUILTIN_VEC_EXT_V2DF,
28358 IX86_BUILTIN_VEC_EXT_V2DI,
28359 IX86_BUILTIN_VEC_EXT_V4SF,
28360 IX86_BUILTIN_VEC_EXT_V4SI,
28361 IX86_BUILTIN_VEC_EXT_V8HI,
28362 IX86_BUILTIN_VEC_EXT_V2SI,
28363 IX86_BUILTIN_VEC_EXT_V4HI,
28364 IX86_BUILTIN_VEC_EXT_V16QI,
28365 IX86_BUILTIN_VEC_SET_V2DI,
28366 IX86_BUILTIN_VEC_SET_V4SF,
28367 IX86_BUILTIN_VEC_SET_V4SI,
28368 IX86_BUILTIN_VEC_SET_V8HI,
28369 IX86_BUILTIN_VEC_SET_V4HI,
28370 IX86_BUILTIN_VEC_SET_V16QI,
28372 IX86_BUILTIN_VEC_PACK_SFIX,
28373 IX86_BUILTIN_VEC_PACK_SFIX256,
28376 IX86_BUILTIN_CRC32QI,
28377 IX86_BUILTIN_CRC32HI,
28378 IX86_BUILTIN_CRC32SI,
28379 IX86_BUILTIN_CRC32DI,
28381 IX86_BUILTIN_PCMPESTRI128,
28382 IX86_BUILTIN_PCMPESTRM128,
28383 IX86_BUILTIN_PCMPESTRA128,
28384 IX86_BUILTIN_PCMPESTRC128,
28385 IX86_BUILTIN_PCMPESTRO128,
28386 IX86_BUILTIN_PCMPESTRS128,
28387 IX86_BUILTIN_PCMPESTRZ128,
28388 IX86_BUILTIN_PCMPISTRI128,
28389 IX86_BUILTIN_PCMPISTRM128,
28390 IX86_BUILTIN_PCMPISTRA128,
28391 IX86_BUILTIN_PCMPISTRC128,
28392 IX86_BUILTIN_PCMPISTRO128,
28393 IX86_BUILTIN_PCMPISTRS128,
28394 IX86_BUILTIN_PCMPISTRZ128,
28396 IX86_BUILTIN_PCMPGTQ,
28398 /* AES instructions */
28399 IX86_BUILTIN_AESENC128,
28400 IX86_BUILTIN_AESENCLAST128,
28401 IX86_BUILTIN_AESDEC128,
28402 IX86_BUILTIN_AESDECLAST128,
28403 IX86_BUILTIN_AESIMC128,
28404 IX86_BUILTIN_AESKEYGENASSIST128,
28406 /* PCLMUL instruction */
28407 IX86_BUILTIN_PCLMULQDQ128,
28410 IX86_BUILTIN_ADDPD256,
28411 IX86_BUILTIN_ADDPS256,
28412 IX86_BUILTIN_ADDSUBPD256,
28413 IX86_BUILTIN_ADDSUBPS256,
28414 IX86_BUILTIN_ANDPD256,
28415 IX86_BUILTIN_ANDPS256,
28416 IX86_BUILTIN_ANDNPD256,
28417 IX86_BUILTIN_ANDNPS256,
28418 IX86_BUILTIN_BLENDPD256,
28419 IX86_BUILTIN_BLENDPS256,
28420 IX86_BUILTIN_BLENDVPD256,
28421 IX86_BUILTIN_BLENDVPS256,
28422 IX86_BUILTIN_DIVPD256,
28423 IX86_BUILTIN_DIVPS256,
28424 IX86_BUILTIN_DPPS256,
28425 IX86_BUILTIN_HADDPD256,
28426 IX86_BUILTIN_HADDPS256,
28427 IX86_BUILTIN_HSUBPD256,
28428 IX86_BUILTIN_HSUBPS256,
28429 IX86_BUILTIN_MAXPD256,
28430 IX86_BUILTIN_MAXPS256,
28431 IX86_BUILTIN_MINPD256,
28432 IX86_BUILTIN_MINPS256,
28433 IX86_BUILTIN_MULPD256,
28434 IX86_BUILTIN_MULPS256,
28435 IX86_BUILTIN_ORPD256,
28436 IX86_BUILTIN_ORPS256,
28437 IX86_BUILTIN_SHUFPD256,
28438 IX86_BUILTIN_SHUFPS256,
28439 IX86_BUILTIN_SUBPD256,
28440 IX86_BUILTIN_SUBPS256,
28441 IX86_BUILTIN_XORPD256,
28442 IX86_BUILTIN_XORPS256,
28443 IX86_BUILTIN_CMPSD,
28444 IX86_BUILTIN_CMPSS,
28445 IX86_BUILTIN_CMPPD,
28446 IX86_BUILTIN_CMPPS,
28447 IX86_BUILTIN_CMPPD256,
28448 IX86_BUILTIN_CMPPS256,
28449 IX86_BUILTIN_CVTDQ2PD256,
28450 IX86_BUILTIN_CVTDQ2PS256,
28451 IX86_BUILTIN_CVTPD2PS256,
28452 IX86_BUILTIN_CVTPS2DQ256,
28453 IX86_BUILTIN_CVTPS2PD256,
28454 IX86_BUILTIN_CVTTPD2DQ256,
28455 IX86_BUILTIN_CVTPD2DQ256,
28456 IX86_BUILTIN_CVTTPS2DQ256,
28457 IX86_BUILTIN_EXTRACTF128PD256,
28458 IX86_BUILTIN_EXTRACTF128PS256,
28459 IX86_BUILTIN_EXTRACTF128SI256,
28460 IX86_BUILTIN_VZEROALL,
28461 IX86_BUILTIN_VZEROUPPER,
28462 IX86_BUILTIN_VPERMILVARPD,
28463 IX86_BUILTIN_VPERMILVARPS,
28464 IX86_BUILTIN_VPERMILVARPD256,
28465 IX86_BUILTIN_VPERMILVARPS256,
28466 IX86_BUILTIN_VPERMILPD,
28467 IX86_BUILTIN_VPERMILPS,
28468 IX86_BUILTIN_VPERMILPD256,
28469 IX86_BUILTIN_VPERMILPS256,
28470 IX86_BUILTIN_VPERMIL2PD,
28471 IX86_BUILTIN_VPERMIL2PS,
28472 IX86_BUILTIN_VPERMIL2PD256,
28473 IX86_BUILTIN_VPERMIL2PS256,
28474 IX86_BUILTIN_VPERM2F128PD256,
28475 IX86_BUILTIN_VPERM2F128PS256,
28476 IX86_BUILTIN_VPERM2F128SI256,
28477 IX86_BUILTIN_VBROADCASTSS,
28478 IX86_BUILTIN_VBROADCASTSD256,
28479 IX86_BUILTIN_VBROADCASTSS256,
28480 IX86_BUILTIN_VBROADCASTPD256,
28481 IX86_BUILTIN_VBROADCASTPS256,
28482 IX86_BUILTIN_VINSERTF128PD256,
28483 IX86_BUILTIN_VINSERTF128PS256,
28484 IX86_BUILTIN_VINSERTF128SI256,
28485 IX86_BUILTIN_LOADUPD256,
28486 IX86_BUILTIN_LOADUPS256,
28487 IX86_BUILTIN_STOREUPD256,
28488 IX86_BUILTIN_STOREUPS256,
28489 IX86_BUILTIN_LDDQU256,
28490 IX86_BUILTIN_MOVNTDQ256,
28491 IX86_BUILTIN_MOVNTPD256,
28492 IX86_BUILTIN_MOVNTPS256,
28493 IX86_BUILTIN_LOADDQU256,
28494 IX86_BUILTIN_STOREDQU256,
28495 IX86_BUILTIN_MASKLOADPD,
28496 IX86_BUILTIN_MASKLOADPS,
28497 IX86_BUILTIN_MASKSTOREPD,
28498 IX86_BUILTIN_MASKSTOREPS,
28499 IX86_BUILTIN_MASKLOADPD256,
28500 IX86_BUILTIN_MASKLOADPS256,
28501 IX86_BUILTIN_MASKSTOREPD256,
28502 IX86_BUILTIN_MASKSTOREPS256,
28503 IX86_BUILTIN_MOVSHDUP256,
28504 IX86_BUILTIN_MOVSLDUP256,
28505 IX86_BUILTIN_MOVDDUP256,
28507 IX86_BUILTIN_SQRTPD256,
28508 IX86_BUILTIN_SQRTPS256,
28509 IX86_BUILTIN_SQRTPS_NR256,
28510 IX86_BUILTIN_RSQRTPS256,
28511 IX86_BUILTIN_RSQRTPS_NR256,
28513 IX86_BUILTIN_RCPPS256,
28515 IX86_BUILTIN_ROUNDPD256,
28516 IX86_BUILTIN_ROUNDPS256,
28518 IX86_BUILTIN_FLOORPD256,
28519 IX86_BUILTIN_CEILPD256,
28520 IX86_BUILTIN_TRUNCPD256,
28521 IX86_BUILTIN_RINTPD256,
28522 IX86_BUILTIN_ROUNDPD_AZ256,
28524 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28525 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28526 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28528 IX86_BUILTIN_FLOORPS256,
28529 IX86_BUILTIN_CEILPS256,
28530 IX86_BUILTIN_TRUNCPS256,
28531 IX86_BUILTIN_RINTPS256,
28532 IX86_BUILTIN_ROUNDPS_AZ256,
28534 IX86_BUILTIN_FLOORPS_SFIX256,
28535 IX86_BUILTIN_CEILPS_SFIX256,
28536 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28538 IX86_BUILTIN_UNPCKHPD256,
28539 IX86_BUILTIN_UNPCKLPD256,
28540 IX86_BUILTIN_UNPCKHPS256,
28541 IX86_BUILTIN_UNPCKLPS256,
28543 IX86_BUILTIN_SI256_SI,
28544 IX86_BUILTIN_PS256_PS,
28545 IX86_BUILTIN_PD256_PD,
28546 IX86_BUILTIN_SI_SI256,
28547 IX86_BUILTIN_PS_PS256,
28548 IX86_BUILTIN_PD_PD256,
28550 IX86_BUILTIN_VTESTZPD,
28551 IX86_BUILTIN_VTESTCPD,
28552 IX86_BUILTIN_VTESTNZCPD,
28553 IX86_BUILTIN_VTESTZPS,
28554 IX86_BUILTIN_VTESTCPS,
28555 IX86_BUILTIN_VTESTNZCPS,
28556 IX86_BUILTIN_VTESTZPD256,
28557 IX86_BUILTIN_VTESTCPD256,
28558 IX86_BUILTIN_VTESTNZCPD256,
28559 IX86_BUILTIN_VTESTZPS256,
28560 IX86_BUILTIN_VTESTCPS256,
28561 IX86_BUILTIN_VTESTNZCPS256,
28562 IX86_BUILTIN_PTESTZ256,
28563 IX86_BUILTIN_PTESTC256,
28564 IX86_BUILTIN_PTESTNZC256,
28566 IX86_BUILTIN_MOVMSKPD256,
28567 IX86_BUILTIN_MOVMSKPS256,
28570 IX86_BUILTIN_MPSADBW256,
28571 IX86_BUILTIN_PABSB256,
28572 IX86_BUILTIN_PABSW256,
28573 IX86_BUILTIN_PABSD256,
28574 IX86_BUILTIN_PACKSSDW256,
28575 IX86_BUILTIN_PACKSSWB256,
28576 IX86_BUILTIN_PACKUSDW256,
28577 IX86_BUILTIN_PACKUSWB256,
28578 IX86_BUILTIN_PADDB256,
28579 IX86_BUILTIN_PADDW256,
28580 IX86_BUILTIN_PADDD256,
28581 IX86_BUILTIN_PADDQ256,
28582 IX86_BUILTIN_PADDSB256,
28583 IX86_BUILTIN_PADDSW256,
28584 IX86_BUILTIN_PADDUSB256,
28585 IX86_BUILTIN_PADDUSW256,
28586 IX86_BUILTIN_PALIGNR256,
28587 IX86_BUILTIN_AND256I,
28588 IX86_BUILTIN_ANDNOT256I,
28589 IX86_BUILTIN_PAVGB256,
28590 IX86_BUILTIN_PAVGW256,
28591 IX86_BUILTIN_PBLENDVB256,
28592 IX86_BUILTIN_PBLENDVW256,
28593 IX86_BUILTIN_PCMPEQB256,
28594 IX86_BUILTIN_PCMPEQW256,
28595 IX86_BUILTIN_PCMPEQD256,
28596 IX86_BUILTIN_PCMPEQQ256,
28597 IX86_BUILTIN_PCMPGTB256,
28598 IX86_BUILTIN_PCMPGTW256,
28599 IX86_BUILTIN_PCMPGTD256,
28600 IX86_BUILTIN_PCMPGTQ256,
28601 IX86_BUILTIN_PHADDW256,
28602 IX86_BUILTIN_PHADDD256,
28603 IX86_BUILTIN_PHADDSW256,
28604 IX86_BUILTIN_PHSUBW256,
28605 IX86_BUILTIN_PHSUBD256,
28606 IX86_BUILTIN_PHSUBSW256,
28607 IX86_BUILTIN_PMADDUBSW256,
28608 IX86_BUILTIN_PMADDWD256,
28609 IX86_BUILTIN_PMAXSB256,
28610 IX86_BUILTIN_PMAXSW256,
28611 IX86_BUILTIN_PMAXSD256,
28612 IX86_BUILTIN_PMAXUB256,
28613 IX86_BUILTIN_PMAXUW256,
28614 IX86_BUILTIN_PMAXUD256,
28615 IX86_BUILTIN_PMINSB256,
28616 IX86_BUILTIN_PMINSW256,
28617 IX86_BUILTIN_PMINSD256,
28618 IX86_BUILTIN_PMINUB256,
28619 IX86_BUILTIN_PMINUW256,
28620 IX86_BUILTIN_PMINUD256,
28621 IX86_BUILTIN_PMOVMSKB256,
28622 IX86_BUILTIN_PMOVSXBW256,
28623 IX86_BUILTIN_PMOVSXBD256,
28624 IX86_BUILTIN_PMOVSXBQ256,
28625 IX86_BUILTIN_PMOVSXWD256,
28626 IX86_BUILTIN_PMOVSXWQ256,
28627 IX86_BUILTIN_PMOVSXDQ256,
28628 IX86_BUILTIN_PMOVZXBW256,
28629 IX86_BUILTIN_PMOVZXBD256,
28630 IX86_BUILTIN_PMOVZXBQ256,
28631 IX86_BUILTIN_PMOVZXWD256,
28632 IX86_BUILTIN_PMOVZXWQ256,
28633 IX86_BUILTIN_PMOVZXDQ256,
28634 IX86_BUILTIN_PMULDQ256,
28635 IX86_BUILTIN_PMULHRSW256,
28636 IX86_BUILTIN_PMULHUW256,
28637 IX86_BUILTIN_PMULHW256,
28638 IX86_BUILTIN_PMULLW256,
28639 IX86_BUILTIN_PMULLD256,
28640 IX86_BUILTIN_PMULUDQ256,
28641 IX86_BUILTIN_POR256,
28642 IX86_BUILTIN_PSADBW256,
28643 IX86_BUILTIN_PSHUFB256,
28644 IX86_BUILTIN_PSHUFD256,
28645 IX86_BUILTIN_PSHUFHW256,
28646 IX86_BUILTIN_PSHUFLW256,
28647 IX86_BUILTIN_PSIGNB256,
28648 IX86_BUILTIN_PSIGNW256,
28649 IX86_BUILTIN_PSIGND256,
28650 IX86_BUILTIN_PSLLDQI256,
28651 IX86_BUILTIN_PSLLWI256,
28652 IX86_BUILTIN_PSLLW256,
28653 IX86_BUILTIN_PSLLDI256,
28654 IX86_BUILTIN_PSLLD256,
28655 IX86_BUILTIN_PSLLQI256,
28656 IX86_BUILTIN_PSLLQ256,
28657 IX86_BUILTIN_PSRAWI256,
28658 IX86_BUILTIN_PSRAW256,
28659 IX86_BUILTIN_PSRADI256,
28660 IX86_BUILTIN_PSRAD256,
28661 IX86_BUILTIN_PSRLDQI256,
28662 IX86_BUILTIN_PSRLWI256,
28663 IX86_BUILTIN_PSRLW256,
28664 IX86_BUILTIN_PSRLDI256,
28665 IX86_BUILTIN_PSRLD256,
28666 IX86_BUILTIN_PSRLQI256,
28667 IX86_BUILTIN_PSRLQ256,
28668 IX86_BUILTIN_PSUBB256,
28669 IX86_BUILTIN_PSUBW256,
28670 IX86_BUILTIN_PSUBD256,
28671 IX86_BUILTIN_PSUBQ256,
28672 IX86_BUILTIN_PSUBSB256,
28673 IX86_BUILTIN_PSUBSW256,
28674 IX86_BUILTIN_PSUBUSB256,
28675 IX86_BUILTIN_PSUBUSW256,
28676 IX86_BUILTIN_PUNPCKHBW256,
28677 IX86_BUILTIN_PUNPCKHWD256,
28678 IX86_BUILTIN_PUNPCKHDQ256,
28679 IX86_BUILTIN_PUNPCKHQDQ256,
28680 IX86_BUILTIN_PUNPCKLBW256,
28681 IX86_BUILTIN_PUNPCKLWD256,
28682 IX86_BUILTIN_PUNPCKLDQ256,
28683 IX86_BUILTIN_PUNPCKLQDQ256,
28684 IX86_BUILTIN_PXOR256,
28685 IX86_BUILTIN_MOVNTDQA256,
28686 IX86_BUILTIN_VBROADCASTSS_PS,
28687 IX86_BUILTIN_VBROADCASTSS_PS256,
28688 IX86_BUILTIN_VBROADCASTSD_PD256,
28689 IX86_BUILTIN_VBROADCASTSI256,
28690 IX86_BUILTIN_PBLENDD256,
28691 IX86_BUILTIN_PBLENDD128,
28692 IX86_BUILTIN_PBROADCASTB256,
28693 IX86_BUILTIN_PBROADCASTW256,
28694 IX86_BUILTIN_PBROADCASTD256,
28695 IX86_BUILTIN_PBROADCASTQ256,
28696 IX86_BUILTIN_PBROADCASTB128,
28697 IX86_BUILTIN_PBROADCASTW128,
28698 IX86_BUILTIN_PBROADCASTD128,
28699 IX86_BUILTIN_PBROADCASTQ128,
28700 IX86_BUILTIN_VPERMVARSI256,
28701 IX86_BUILTIN_VPERMDF256,
28702 IX86_BUILTIN_VPERMVARSF256,
28703 IX86_BUILTIN_VPERMDI256,
28704 IX86_BUILTIN_VPERMTI256,
28705 IX86_BUILTIN_VEXTRACT128I256,
28706 IX86_BUILTIN_VINSERT128I256,
28707 IX86_BUILTIN_MASKLOADD,
28708 IX86_BUILTIN_MASKLOADQ,
28709 IX86_BUILTIN_MASKLOADD256,
28710 IX86_BUILTIN_MASKLOADQ256,
28711 IX86_BUILTIN_MASKSTORED,
28712 IX86_BUILTIN_MASKSTOREQ,
28713 IX86_BUILTIN_MASKSTORED256,
28714 IX86_BUILTIN_MASKSTOREQ256,
28715 IX86_BUILTIN_PSLLVV4DI,
28716 IX86_BUILTIN_PSLLVV2DI,
28717 IX86_BUILTIN_PSLLVV8SI,
28718 IX86_BUILTIN_PSLLVV4SI,
28719 IX86_BUILTIN_PSRAVV8SI,
28720 IX86_BUILTIN_PSRAVV4SI,
28721 IX86_BUILTIN_PSRLVV4DI,
28722 IX86_BUILTIN_PSRLVV2DI,
28723 IX86_BUILTIN_PSRLVV8SI,
28724 IX86_BUILTIN_PSRLVV4SI,
28726 IX86_BUILTIN_GATHERSIV2DF,
28727 IX86_BUILTIN_GATHERSIV4DF,
28728 IX86_BUILTIN_GATHERDIV2DF,
28729 IX86_BUILTIN_GATHERDIV4DF,
28730 IX86_BUILTIN_GATHERSIV4SF,
28731 IX86_BUILTIN_GATHERSIV8SF,
28732 IX86_BUILTIN_GATHERDIV4SF,
28733 IX86_BUILTIN_GATHERDIV8SF,
28734 IX86_BUILTIN_GATHERSIV2DI,
28735 IX86_BUILTIN_GATHERSIV4DI,
28736 IX86_BUILTIN_GATHERDIV2DI,
28737 IX86_BUILTIN_GATHERDIV4DI,
28738 IX86_BUILTIN_GATHERSIV4SI,
28739 IX86_BUILTIN_GATHERSIV8SI,
28740 IX86_BUILTIN_GATHERDIV4SI,
28741 IX86_BUILTIN_GATHERDIV8SI,
28744 IX86_BUILTIN_SI512_SI256,
28745 IX86_BUILTIN_PD512_PD256,
28746 IX86_BUILTIN_PS512_PS256,
28747 IX86_BUILTIN_SI512_SI,
28748 IX86_BUILTIN_PD512_PD,
28749 IX86_BUILTIN_PS512_PS,
28750 IX86_BUILTIN_ADDPD512,
28751 IX86_BUILTIN_ADDPS512,
28752 IX86_BUILTIN_ADDSD_ROUND,
28753 IX86_BUILTIN_ADDSS_ROUND,
28754 IX86_BUILTIN_ALIGND512,
28755 IX86_BUILTIN_ALIGNQ512,
28756 IX86_BUILTIN_BLENDMD512,
28757 IX86_BUILTIN_BLENDMPD512,
28758 IX86_BUILTIN_BLENDMPS512,
28759 IX86_BUILTIN_BLENDMQ512,
28760 IX86_BUILTIN_BROADCASTF32X4_512,
28761 IX86_BUILTIN_BROADCASTF64X4_512,
28762 IX86_BUILTIN_BROADCASTI32X4_512,
28763 IX86_BUILTIN_BROADCASTI64X4_512,
28764 IX86_BUILTIN_BROADCASTSD512,
28765 IX86_BUILTIN_BROADCASTSS512,
28766 IX86_BUILTIN_CMPD512,
28767 IX86_BUILTIN_CMPPD512,
28768 IX86_BUILTIN_CMPPS512,
28769 IX86_BUILTIN_CMPQ512,
28770 IX86_BUILTIN_CMPSD_MASK,
28771 IX86_BUILTIN_CMPSS_MASK,
28772 IX86_BUILTIN_COMIDF,
28773 IX86_BUILTIN_COMISF,
28774 IX86_BUILTIN_COMPRESSPD512,
28775 IX86_BUILTIN_COMPRESSPDSTORE512,
28776 IX86_BUILTIN_COMPRESSPS512,
28777 IX86_BUILTIN_COMPRESSPSSTORE512,
28778 IX86_BUILTIN_CVTDQ2PD512,
28779 IX86_BUILTIN_CVTDQ2PS512,
28780 IX86_BUILTIN_CVTPD2DQ512,
28781 IX86_BUILTIN_CVTPD2PS512,
28782 IX86_BUILTIN_CVTPD2UDQ512,
28783 IX86_BUILTIN_CVTPH2PS512,
28784 IX86_BUILTIN_CVTPS2DQ512,
28785 IX86_BUILTIN_CVTPS2PD512,
28786 IX86_BUILTIN_CVTPS2PH512,
28787 IX86_BUILTIN_CVTPS2UDQ512,
28788 IX86_BUILTIN_CVTSD2SS_ROUND,
28789 IX86_BUILTIN_CVTSI2SD64,
28790 IX86_BUILTIN_CVTSI2SS32,
28791 IX86_BUILTIN_CVTSI2SS64,
28792 IX86_BUILTIN_CVTSS2SD_ROUND,
28793 IX86_BUILTIN_CVTTPD2DQ512,
28794 IX86_BUILTIN_CVTTPD2UDQ512,
28795 IX86_BUILTIN_CVTTPS2DQ512,
28796 IX86_BUILTIN_CVTTPS2UDQ512,
28797 IX86_BUILTIN_CVTUDQ2PD512,
28798 IX86_BUILTIN_CVTUDQ2PS512,
28799 IX86_BUILTIN_CVTUSI2SD32,
28800 IX86_BUILTIN_CVTUSI2SD64,
28801 IX86_BUILTIN_CVTUSI2SS32,
28802 IX86_BUILTIN_CVTUSI2SS64,
28803 IX86_BUILTIN_DIVPD512,
28804 IX86_BUILTIN_DIVPS512,
28805 IX86_BUILTIN_DIVSD_ROUND,
28806 IX86_BUILTIN_DIVSS_ROUND,
28807 IX86_BUILTIN_EXPANDPD512,
28808 IX86_BUILTIN_EXPANDPD512Z,
28809 IX86_BUILTIN_EXPANDPDLOAD512,
28810 IX86_BUILTIN_EXPANDPDLOAD512Z,
28811 IX86_BUILTIN_EXPANDPS512,
28812 IX86_BUILTIN_EXPANDPS512Z,
28813 IX86_BUILTIN_EXPANDPSLOAD512,
28814 IX86_BUILTIN_EXPANDPSLOAD512Z,
28815 IX86_BUILTIN_EXTRACTF32X4,
28816 IX86_BUILTIN_EXTRACTF64X4,
28817 IX86_BUILTIN_EXTRACTI32X4,
28818 IX86_BUILTIN_EXTRACTI64X4,
28819 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28820 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28821 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28822 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28823 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28824 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28825 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28826 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28827 IX86_BUILTIN_GETEXPPD512,
28828 IX86_BUILTIN_GETEXPPS512,
28829 IX86_BUILTIN_GETEXPSD128,
28830 IX86_BUILTIN_GETEXPSS128,
28831 IX86_BUILTIN_GETMANTPD512,
28832 IX86_BUILTIN_GETMANTPS512,
28833 IX86_BUILTIN_GETMANTSD128,
28834 IX86_BUILTIN_GETMANTSS128,
28835 IX86_BUILTIN_INSERTF32X4,
28836 IX86_BUILTIN_INSERTF64X4,
28837 IX86_BUILTIN_INSERTI32X4,
28838 IX86_BUILTIN_INSERTI64X4,
28839 IX86_BUILTIN_LOADAPD512,
28840 IX86_BUILTIN_LOADAPS512,
28841 IX86_BUILTIN_LOADDQUDI512,
28842 IX86_BUILTIN_LOADDQUSI512,
28843 IX86_BUILTIN_LOADUPD512,
28844 IX86_BUILTIN_LOADUPS512,
28845 IX86_BUILTIN_MAXPD512,
28846 IX86_BUILTIN_MAXPS512,
28847 IX86_BUILTIN_MAXSD_ROUND,
28848 IX86_BUILTIN_MAXSS_ROUND,
28849 IX86_BUILTIN_MINPD512,
28850 IX86_BUILTIN_MINPS512,
28851 IX86_BUILTIN_MINSD_ROUND,
28852 IX86_BUILTIN_MINSS_ROUND,
28853 IX86_BUILTIN_MOVAPD512,
28854 IX86_BUILTIN_MOVAPS512,
28855 IX86_BUILTIN_MOVDDUP512,
28856 IX86_BUILTIN_MOVDQA32LOAD512,
28857 IX86_BUILTIN_MOVDQA32STORE512,
28858 IX86_BUILTIN_MOVDQA32_512,
28859 IX86_BUILTIN_MOVDQA64LOAD512,
28860 IX86_BUILTIN_MOVDQA64STORE512,
28861 IX86_BUILTIN_MOVDQA64_512,
28862 IX86_BUILTIN_MOVNTDQ512,
28863 IX86_BUILTIN_MOVNTDQA512,
28864 IX86_BUILTIN_MOVNTPD512,
28865 IX86_BUILTIN_MOVNTPS512,
28866 IX86_BUILTIN_MOVSHDUP512,
28867 IX86_BUILTIN_MOVSLDUP512,
28868 IX86_BUILTIN_MULPD512,
28869 IX86_BUILTIN_MULPS512,
28870 IX86_BUILTIN_MULSD_ROUND,
28871 IX86_BUILTIN_MULSS_ROUND,
28872 IX86_BUILTIN_PABSD512,
28873 IX86_BUILTIN_PABSQ512,
28874 IX86_BUILTIN_PADDD512,
28875 IX86_BUILTIN_PADDQ512,
28876 IX86_BUILTIN_PANDD512,
28877 IX86_BUILTIN_PANDND512,
28878 IX86_BUILTIN_PANDNQ512,
28879 IX86_BUILTIN_PANDQ512,
28880 IX86_BUILTIN_PBROADCASTD512,
28881 IX86_BUILTIN_PBROADCASTD512_GPR,
28882 IX86_BUILTIN_PBROADCASTMB512,
28883 IX86_BUILTIN_PBROADCASTMW512,
28884 IX86_BUILTIN_PBROADCASTQ512,
28885 IX86_BUILTIN_PBROADCASTQ512_GPR,
28886 IX86_BUILTIN_PCMPEQD512_MASK,
28887 IX86_BUILTIN_PCMPEQQ512_MASK,
28888 IX86_BUILTIN_PCMPGTD512_MASK,
28889 IX86_BUILTIN_PCMPGTQ512_MASK,
28890 IX86_BUILTIN_PCOMPRESSD512,
28891 IX86_BUILTIN_PCOMPRESSDSTORE512,
28892 IX86_BUILTIN_PCOMPRESSQ512,
28893 IX86_BUILTIN_PCOMPRESSQSTORE512,
28894 IX86_BUILTIN_PEXPANDD512,
28895 IX86_BUILTIN_PEXPANDD512Z,
28896 IX86_BUILTIN_PEXPANDDLOAD512,
28897 IX86_BUILTIN_PEXPANDDLOAD512Z,
28898 IX86_BUILTIN_PEXPANDQ512,
28899 IX86_BUILTIN_PEXPANDQ512Z,
28900 IX86_BUILTIN_PEXPANDQLOAD512,
28901 IX86_BUILTIN_PEXPANDQLOAD512Z,
28902 IX86_BUILTIN_PMAXSD512,
28903 IX86_BUILTIN_PMAXSQ512,
28904 IX86_BUILTIN_PMAXUD512,
28905 IX86_BUILTIN_PMAXUQ512,
28906 IX86_BUILTIN_PMINSD512,
28907 IX86_BUILTIN_PMINSQ512,
28908 IX86_BUILTIN_PMINUD512,
28909 IX86_BUILTIN_PMINUQ512,
28910 IX86_BUILTIN_PMOVDB512,
28911 IX86_BUILTIN_PMOVDB512_MEM,
28912 IX86_BUILTIN_PMOVDW512,
28913 IX86_BUILTIN_PMOVDW512_MEM,
28914 IX86_BUILTIN_PMOVQB512,
28915 IX86_BUILTIN_PMOVQB512_MEM,
28916 IX86_BUILTIN_PMOVQD512,
28917 IX86_BUILTIN_PMOVQD512_MEM,
28918 IX86_BUILTIN_PMOVQW512,
28919 IX86_BUILTIN_PMOVQW512_MEM,
28920 IX86_BUILTIN_PMOVSDB512,
28921 IX86_BUILTIN_PMOVSDB512_MEM,
28922 IX86_BUILTIN_PMOVSDW512,
28923 IX86_BUILTIN_PMOVSDW512_MEM,
28924 IX86_BUILTIN_PMOVSQB512,
28925 IX86_BUILTIN_PMOVSQB512_MEM,
28926 IX86_BUILTIN_PMOVSQD512,
28927 IX86_BUILTIN_PMOVSQD512_MEM,
28928 IX86_BUILTIN_PMOVSQW512,
28929 IX86_BUILTIN_PMOVSQW512_MEM,
28930 IX86_BUILTIN_PMOVSXBD512,
28931 IX86_BUILTIN_PMOVSXBQ512,
28932 IX86_BUILTIN_PMOVSXDQ512,
28933 IX86_BUILTIN_PMOVSXWD512,
28934 IX86_BUILTIN_PMOVSXWQ512,
28935 IX86_BUILTIN_PMOVUSDB512,
28936 IX86_BUILTIN_PMOVUSDB512_MEM,
28937 IX86_BUILTIN_PMOVUSDW512,
28938 IX86_BUILTIN_PMOVUSDW512_MEM,
28939 IX86_BUILTIN_PMOVUSQB512,
28940 IX86_BUILTIN_PMOVUSQB512_MEM,
28941 IX86_BUILTIN_PMOVUSQD512,
28942 IX86_BUILTIN_PMOVUSQD512_MEM,
28943 IX86_BUILTIN_PMOVUSQW512,
28944 IX86_BUILTIN_PMOVUSQW512_MEM,
28945 IX86_BUILTIN_PMOVZXBD512,
28946 IX86_BUILTIN_PMOVZXBQ512,
28947 IX86_BUILTIN_PMOVZXDQ512,
28948 IX86_BUILTIN_PMOVZXWD512,
28949 IX86_BUILTIN_PMOVZXWQ512,
28950 IX86_BUILTIN_PMULDQ512,
28951 IX86_BUILTIN_PMULLD512,
28952 IX86_BUILTIN_PMULUDQ512,
28953 IX86_BUILTIN_PORD512,
28954 IX86_BUILTIN_PORQ512,
28955 IX86_BUILTIN_PROLD512,
28956 IX86_BUILTIN_PROLQ512,
28957 IX86_BUILTIN_PROLVD512,
28958 IX86_BUILTIN_PROLVQ512,
28959 IX86_BUILTIN_PRORD512,
28960 IX86_BUILTIN_PRORQ512,
28961 IX86_BUILTIN_PRORVD512,
28962 IX86_BUILTIN_PRORVQ512,
28963 IX86_BUILTIN_PSHUFD512,
28964 IX86_BUILTIN_PSLLD512,
28965 IX86_BUILTIN_PSLLDI512,
28966 IX86_BUILTIN_PSLLQ512,
28967 IX86_BUILTIN_PSLLQI512,
28968 IX86_BUILTIN_PSLLVV16SI,
28969 IX86_BUILTIN_PSLLVV8DI,
28970 IX86_BUILTIN_PSRAD512,
28971 IX86_BUILTIN_PSRADI512,
28972 IX86_BUILTIN_PSRAQ512,
28973 IX86_BUILTIN_PSRAQI512,
28974 IX86_BUILTIN_PSRAVV16SI,
28975 IX86_BUILTIN_PSRAVV8DI,
28976 IX86_BUILTIN_PSRLD512,
28977 IX86_BUILTIN_PSRLDI512,
28978 IX86_BUILTIN_PSRLQ512,
28979 IX86_BUILTIN_PSRLQI512,
28980 IX86_BUILTIN_PSRLVV16SI,
28981 IX86_BUILTIN_PSRLVV8DI,
28982 IX86_BUILTIN_PSUBD512,
28983 IX86_BUILTIN_PSUBQ512,
28984 IX86_BUILTIN_PTESTMD512,
28985 IX86_BUILTIN_PTESTMQ512,
28986 IX86_BUILTIN_PTESTNMD512,
28987 IX86_BUILTIN_PTESTNMQ512,
28988 IX86_BUILTIN_PUNPCKHDQ512,
28989 IX86_BUILTIN_PUNPCKHQDQ512,
28990 IX86_BUILTIN_PUNPCKLDQ512,
28991 IX86_BUILTIN_PUNPCKLQDQ512,
28992 IX86_BUILTIN_PXORD512,
28993 IX86_BUILTIN_PXORQ512,
28994 IX86_BUILTIN_RCP14PD512,
28995 IX86_BUILTIN_RCP14PS512,
28996 IX86_BUILTIN_RCP14SD,
28997 IX86_BUILTIN_RCP14SS,
28998 IX86_BUILTIN_RNDSCALEPD,
28999 IX86_BUILTIN_RNDSCALEPS,
29000 IX86_BUILTIN_RNDSCALESD,
29001 IX86_BUILTIN_RNDSCALESS,
29002 IX86_BUILTIN_RSQRT14PD512,
29003 IX86_BUILTIN_RSQRT14PS512,
29004 IX86_BUILTIN_RSQRT14SD,
29005 IX86_BUILTIN_RSQRT14SS,
29006 IX86_BUILTIN_SCALEFPD512,
29007 IX86_BUILTIN_SCALEFPS512,
29008 IX86_BUILTIN_SCALEFSD,
29009 IX86_BUILTIN_SCALEFSS,
29010 IX86_BUILTIN_SHUFPD512,
29011 IX86_BUILTIN_SHUFPS512,
29012 IX86_BUILTIN_SHUF_F32x4,
29013 IX86_BUILTIN_SHUF_F64x2,
29014 IX86_BUILTIN_SHUF_I32x4,
29015 IX86_BUILTIN_SHUF_I64x2,
29016 IX86_BUILTIN_SQRTPD512,
29017 IX86_BUILTIN_SQRTPD512_MASK,
29018 IX86_BUILTIN_SQRTPS512_MASK,
29019 IX86_BUILTIN_SQRTPS_NR512,
29020 IX86_BUILTIN_SQRTSD_ROUND,
29021 IX86_BUILTIN_SQRTSS_ROUND,
29022 IX86_BUILTIN_STOREAPD512,
29023 IX86_BUILTIN_STOREAPS512,
29024 IX86_BUILTIN_STOREDQUDI512,
29025 IX86_BUILTIN_STOREDQUSI512,
29026 IX86_BUILTIN_STOREUPD512,
29027 IX86_BUILTIN_STOREUPS512,
29028 IX86_BUILTIN_SUBPD512,
29029 IX86_BUILTIN_SUBPS512,
29030 IX86_BUILTIN_SUBSD_ROUND,
29031 IX86_BUILTIN_SUBSS_ROUND,
29032 IX86_BUILTIN_UCMPD512,
29033 IX86_BUILTIN_UCMPQ512,
29034 IX86_BUILTIN_UNPCKHPD512,
29035 IX86_BUILTIN_UNPCKHPS512,
29036 IX86_BUILTIN_UNPCKLPD512,
29037 IX86_BUILTIN_UNPCKLPS512,
29038 IX86_BUILTIN_VCVTSD2SI32,
29039 IX86_BUILTIN_VCVTSD2SI64,
29040 IX86_BUILTIN_VCVTSD2USI32,
29041 IX86_BUILTIN_VCVTSD2USI64,
29042 IX86_BUILTIN_VCVTSS2SI32,
29043 IX86_BUILTIN_VCVTSS2SI64,
29044 IX86_BUILTIN_VCVTSS2USI32,
29045 IX86_BUILTIN_VCVTSS2USI64,
29046 IX86_BUILTIN_VCVTTSD2SI32,
29047 IX86_BUILTIN_VCVTTSD2SI64,
29048 IX86_BUILTIN_VCVTTSD2USI32,
29049 IX86_BUILTIN_VCVTTSD2USI64,
29050 IX86_BUILTIN_VCVTTSS2SI32,
29051 IX86_BUILTIN_VCVTTSS2SI64,
29052 IX86_BUILTIN_VCVTTSS2USI32,
29053 IX86_BUILTIN_VCVTTSS2USI64,
29054 IX86_BUILTIN_VFMADDPD512_MASK,
29055 IX86_BUILTIN_VFMADDPD512_MASK3,
29056 IX86_BUILTIN_VFMADDPD512_MASKZ,
29057 IX86_BUILTIN_VFMADDPS512_MASK,
29058 IX86_BUILTIN_VFMADDPS512_MASK3,
29059 IX86_BUILTIN_VFMADDPS512_MASKZ,
29060 IX86_BUILTIN_VFMADDSD3_ROUND,
29061 IX86_BUILTIN_VFMADDSS3_ROUND,
29062 IX86_BUILTIN_VFMADDSUBPD512_MASK,
29063 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
29064 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
29065 IX86_BUILTIN_VFMADDSUBPS512_MASK,
29066 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
29067 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29068 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29069 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29070 IX86_BUILTIN_VFMSUBPD512_MASK3,
29071 IX86_BUILTIN_VFMSUBPS512_MASK3,
29072 IX86_BUILTIN_VFMSUBSD3_MASK3,
29073 IX86_BUILTIN_VFMSUBSS3_MASK3,
29074 IX86_BUILTIN_VFNMADDPD512_MASK,
29075 IX86_BUILTIN_VFNMADDPS512_MASK,
29076 IX86_BUILTIN_VFNMSUBPD512_MASK,
29077 IX86_BUILTIN_VFNMSUBPD512_MASK3,
29078 IX86_BUILTIN_VFNMSUBPS512_MASK,
29079 IX86_BUILTIN_VFNMSUBPS512_MASK3,
29080 IX86_BUILTIN_VPCLZCNTD512,
29081 IX86_BUILTIN_VPCLZCNTQ512,
29082 IX86_BUILTIN_VPCONFLICTD512,
29083 IX86_BUILTIN_VPCONFLICTQ512,
29084 IX86_BUILTIN_VPERMDF512,
29085 IX86_BUILTIN_VPERMDI512,
29086 IX86_BUILTIN_VPERMI2VARD512,
29087 IX86_BUILTIN_VPERMI2VARPD512,
29088 IX86_BUILTIN_VPERMI2VARPS512,
29089 IX86_BUILTIN_VPERMI2VARQ512,
29090 IX86_BUILTIN_VPERMILPD512,
29091 IX86_BUILTIN_VPERMILPS512,
29092 IX86_BUILTIN_VPERMILVARPD512,
29093 IX86_BUILTIN_VPERMILVARPS512,
29094 IX86_BUILTIN_VPERMT2VARD512,
29095 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29096 IX86_BUILTIN_VPERMT2VARPD512,
29097 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29098 IX86_BUILTIN_VPERMT2VARPS512,
29099 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29100 IX86_BUILTIN_VPERMT2VARQ512,
29101 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29102 IX86_BUILTIN_VPERMVARDF512,
29103 IX86_BUILTIN_VPERMVARDI512,
29104 IX86_BUILTIN_VPERMVARSF512,
29105 IX86_BUILTIN_VPERMVARSI512,
29106 IX86_BUILTIN_VTERNLOGD512_MASK,
29107 IX86_BUILTIN_VTERNLOGD512_MASKZ,
29108 IX86_BUILTIN_VTERNLOGQ512_MASK,
29109 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29111 /* Mask arithmetic operations */
29112 IX86_BUILTIN_KAND16,
29113 IX86_BUILTIN_KANDN16,
29114 IX86_BUILTIN_KNOT16,
29115 IX86_BUILTIN_KOR16,
29116 IX86_BUILTIN_KORTESTC16,
29117 IX86_BUILTIN_KORTESTZ16,
29118 IX86_BUILTIN_KUNPCKBW,
29119 IX86_BUILTIN_KXNOR16,
29120 IX86_BUILTIN_KXOR16,
29121 IX86_BUILTIN_KMOV16,
29124 IX86_BUILTIN_PMOVUSQD256_MEM,
29125 IX86_BUILTIN_PMOVUSQD128_MEM,
29126 IX86_BUILTIN_PMOVSQD256_MEM,
29127 IX86_BUILTIN_PMOVSQD128_MEM,
29128 IX86_BUILTIN_PMOVQD256_MEM,
29129 IX86_BUILTIN_PMOVQD128_MEM,
29130 IX86_BUILTIN_PMOVUSQW256_MEM,
29131 IX86_BUILTIN_PMOVUSQW128_MEM,
29132 IX86_BUILTIN_PMOVSQW256_MEM,
29133 IX86_BUILTIN_PMOVSQW128_MEM,
29134 IX86_BUILTIN_PMOVQW256_MEM,
29135 IX86_BUILTIN_PMOVQW128_MEM,
29136 IX86_BUILTIN_PMOVUSQB256_MEM,
29137 IX86_BUILTIN_PMOVUSQB128_MEM,
29138 IX86_BUILTIN_PMOVSQB256_MEM,
29139 IX86_BUILTIN_PMOVSQB128_MEM,
29140 IX86_BUILTIN_PMOVQB256_MEM,
29141 IX86_BUILTIN_PMOVQB128_MEM,
29142 IX86_BUILTIN_PMOVUSDW256_MEM,
29143 IX86_BUILTIN_PMOVUSDW128_MEM,
29144 IX86_BUILTIN_PMOVSDW256_MEM,
29145 IX86_BUILTIN_PMOVSDW128_MEM,
29146 IX86_BUILTIN_PMOVDW256_MEM,
29147 IX86_BUILTIN_PMOVDW128_MEM,
29148 IX86_BUILTIN_PMOVUSDB256_MEM,
29149 IX86_BUILTIN_PMOVUSDB128_MEM,
29150 IX86_BUILTIN_PMOVSDB256_MEM,
29151 IX86_BUILTIN_PMOVSDB128_MEM,
29152 IX86_BUILTIN_PMOVDB256_MEM,
29153 IX86_BUILTIN_PMOVDB128_MEM,
29154 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29155 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29156 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29157 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29158 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29159 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29160 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29161 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29162 IX86_BUILTIN_LOADAPD256_MASK,
29163 IX86_BUILTIN_LOADAPD128_MASK,
29164 IX86_BUILTIN_LOADAPS256_MASK,
29165 IX86_BUILTIN_LOADAPS128_MASK,
29166 IX86_BUILTIN_STOREAPD256_MASK,
29167 IX86_BUILTIN_STOREAPD128_MASK,
29168 IX86_BUILTIN_STOREAPS256_MASK,
29169 IX86_BUILTIN_STOREAPS128_MASK,
29170 IX86_BUILTIN_LOADUPD256_MASK,
29171 IX86_BUILTIN_LOADUPD128_MASK,
29172 IX86_BUILTIN_LOADUPS256_MASK,
29173 IX86_BUILTIN_LOADUPS128_MASK,
29174 IX86_BUILTIN_STOREUPD256_MASK,
29175 IX86_BUILTIN_STOREUPD128_MASK,
29176 IX86_BUILTIN_STOREUPS256_MASK,
29177 IX86_BUILTIN_STOREUPS128_MASK,
29178 IX86_BUILTIN_LOADDQUDI256_MASK,
29179 IX86_BUILTIN_LOADDQUDI128_MASK,
29180 IX86_BUILTIN_LOADDQUSI256_MASK,
29181 IX86_BUILTIN_LOADDQUSI128_MASK,
29182 IX86_BUILTIN_LOADDQUHI256_MASK,
29183 IX86_BUILTIN_LOADDQUHI128_MASK,
29184 IX86_BUILTIN_LOADDQUQI256_MASK,
29185 IX86_BUILTIN_LOADDQUQI128_MASK,
29186 IX86_BUILTIN_STOREDQUDI256_MASK,
29187 IX86_BUILTIN_STOREDQUDI128_MASK,
29188 IX86_BUILTIN_STOREDQUSI256_MASK,
29189 IX86_BUILTIN_STOREDQUSI128_MASK,
29190 IX86_BUILTIN_STOREDQUHI256_MASK,
29191 IX86_BUILTIN_STOREDQUHI128_MASK,
29192 IX86_BUILTIN_STOREDQUQI256_MASK,
29193 IX86_BUILTIN_STOREDQUQI128_MASK,
29194 IX86_BUILTIN_COMPRESSPDSTORE256,
29195 IX86_BUILTIN_COMPRESSPDSTORE128,
29196 IX86_BUILTIN_COMPRESSPSSTORE256,
29197 IX86_BUILTIN_COMPRESSPSSTORE128,
29198 IX86_BUILTIN_PCOMPRESSQSTORE256,
29199 IX86_BUILTIN_PCOMPRESSQSTORE128,
29200 IX86_BUILTIN_PCOMPRESSDSTORE256,
29201 IX86_BUILTIN_PCOMPRESSDSTORE128,
29202 IX86_BUILTIN_EXPANDPDLOAD256,
29203 IX86_BUILTIN_EXPANDPDLOAD128,
29204 IX86_BUILTIN_EXPANDPSLOAD256,
29205 IX86_BUILTIN_EXPANDPSLOAD128,
29206 IX86_BUILTIN_PEXPANDQLOAD256,
29207 IX86_BUILTIN_PEXPANDQLOAD128,
29208 IX86_BUILTIN_PEXPANDDLOAD256,
29209 IX86_BUILTIN_PEXPANDDLOAD128,
29210 IX86_BUILTIN_EXPANDPDLOAD256Z,
29211 IX86_BUILTIN_EXPANDPDLOAD128Z,
29212 IX86_BUILTIN_EXPANDPSLOAD256Z,
29213 IX86_BUILTIN_EXPANDPSLOAD128Z,
29214 IX86_BUILTIN_PEXPANDQLOAD256Z,
29215 IX86_BUILTIN_PEXPANDQLOAD128Z,
29216 IX86_BUILTIN_PEXPANDDLOAD256Z,
29217 IX86_BUILTIN_PEXPANDDLOAD128Z,
29218 IX86_BUILTIN_PALIGNR256_MASK,
29219 IX86_BUILTIN_PALIGNR128_MASK,
29220 IX86_BUILTIN_MOVDQA64_256_MASK,
29221 IX86_BUILTIN_MOVDQA64_128_MASK,
29222 IX86_BUILTIN_MOVDQA32_256_MASK,
29223 IX86_BUILTIN_MOVDQA32_128_MASK,
29224 IX86_BUILTIN_MOVAPD256_MASK,
29225 IX86_BUILTIN_MOVAPD128_MASK,
29226 IX86_BUILTIN_MOVAPS256_MASK,
29227 IX86_BUILTIN_MOVAPS128_MASK,
29228 IX86_BUILTIN_MOVDQUHI256_MASK,
29229 IX86_BUILTIN_MOVDQUHI128_MASK,
29230 IX86_BUILTIN_MOVDQUQI256_MASK,
29231 IX86_BUILTIN_MOVDQUQI128_MASK,
29232 IX86_BUILTIN_MINPS128_MASK,
29233 IX86_BUILTIN_MAXPS128_MASK,
29234 IX86_BUILTIN_MINPD128_MASK,
29235 IX86_BUILTIN_MAXPD128_MASK,
29236 IX86_BUILTIN_MAXPD256_MASK,
29237 IX86_BUILTIN_MAXPS256_MASK,
29238 IX86_BUILTIN_MINPD256_MASK,
29239 IX86_BUILTIN_MINPS256_MASK,
29240 IX86_BUILTIN_MULPS128_MASK,
29241 IX86_BUILTIN_DIVPS128_MASK,
29242 IX86_BUILTIN_MULPD128_MASK,
29243 IX86_BUILTIN_DIVPD128_MASK,
29244 IX86_BUILTIN_DIVPD256_MASK,
29245 IX86_BUILTIN_DIVPS256_MASK,
29246 IX86_BUILTIN_MULPD256_MASK,
29247 IX86_BUILTIN_MULPS256_MASK,
29248 IX86_BUILTIN_ADDPD128_MASK,
29249 IX86_BUILTIN_ADDPD256_MASK,
29250 IX86_BUILTIN_ADDPS128_MASK,
29251 IX86_BUILTIN_ADDPS256_MASK,
29252 IX86_BUILTIN_SUBPD128_MASK,
29253 IX86_BUILTIN_SUBPD256_MASK,
29254 IX86_BUILTIN_SUBPS128_MASK,
29255 IX86_BUILTIN_SUBPS256_MASK,
29256 IX86_BUILTIN_XORPD256_MASK,
29257 IX86_BUILTIN_XORPD128_MASK,
29258 IX86_BUILTIN_XORPS256_MASK,
29259 IX86_BUILTIN_XORPS128_MASK,
29260 IX86_BUILTIN_ORPD256_MASK,
29261 IX86_BUILTIN_ORPD128_MASK,
29262 IX86_BUILTIN_ORPS256_MASK,
29263 IX86_BUILTIN_ORPS128_MASK,
29264 IX86_BUILTIN_BROADCASTF32x2_256,
29265 IX86_BUILTIN_BROADCASTI32x2_256,
29266 IX86_BUILTIN_BROADCASTI32x2_128,
29267 IX86_BUILTIN_BROADCASTF64X2_256,
29268 IX86_BUILTIN_BROADCASTI64X2_256,
29269 IX86_BUILTIN_BROADCASTF32X4_256,
29270 IX86_BUILTIN_BROADCASTI32X4_256,
29271 IX86_BUILTIN_EXTRACTF32X4_256,
29272 IX86_BUILTIN_EXTRACTI32X4_256,
29273 IX86_BUILTIN_DBPSADBW256,
29274 IX86_BUILTIN_DBPSADBW128,
29275 IX86_BUILTIN_CVTTPD2QQ256,
29276 IX86_BUILTIN_CVTTPD2QQ128,
29277 IX86_BUILTIN_CVTTPD2UQQ256,
29278 IX86_BUILTIN_CVTTPD2UQQ128,
29279 IX86_BUILTIN_CVTPD2QQ256,
29280 IX86_BUILTIN_CVTPD2QQ128,
29281 IX86_BUILTIN_CVTPD2UQQ256,
29282 IX86_BUILTIN_CVTPD2UQQ128,
29283 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29284 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29285 IX86_BUILTIN_CVTTPS2QQ256,
29286 IX86_BUILTIN_CVTTPS2QQ128,
29287 IX86_BUILTIN_CVTTPS2UQQ256,
29288 IX86_BUILTIN_CVTTPS2UQQ128,
29289 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29290 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29291 IX86_BUILTIN_CVTTPS2UDQ256,
29292 IX86_BUILTIN_CVTTPS2UDQ128,
29293 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29294 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29295 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29296 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29297 IX86_BUILTIN_CVTPD2DQ256_MASK,
29298 IX86_BUILTIN_CVTPD2DQ128_MASK,
29299 IX86_BUILTIN_CVTDQ2PD256_MASK,
29300 IX86_BUILTIN_CVTDQ2PD128_MASK,
29301 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29302 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29303 IX86_BUILTIN_CVTDQ2PS256_MASK,
29304 IX86_BUILTIN_CVTDQ2PS128_MASK,
29305 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29306 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29307 IX86_BUILTIN_CVTPS2PD256_MASK,
29308 IX86_BUILTIN_CVTPS2PD128_MASK,
29309 IX86_BUILTIN_PBROADCASTB256_MASK,
29310 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29311 IX86_BUILTIN_PBROADCASTB128_MASK,
29312 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29313 IX86_BUILTIN_PBROADCASTW256_MASK,
29314 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29315 IX86_BUILTIN_PBROADCASTW128_MASK,
29316 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29317 IX86_BUILTIN_PBROADCASTD256_MASK,
29318 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29319 IX86_BUILTIN_PBROADCASTD128_MASK,
29320 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29321 IX86_BUILTIN_PBROADCASTQ256_MASK,
29322 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29323 IX86_BUILTIN_PBROADCASTQ128_MASK,
29324 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29325 IX86_BUILTIN_BROADCASTSS256,
29326 IX86_BUILTIN_BROADCASTSS128,
29327 IX86_BUILTIN_BROADCASTSD256,
29328 IX86_BUILTIN_EXTRACTF64X2_256,
29329 IX86_BUILTIN_EXTRACTI64X2_256,
29330 IX86_BUILTIN_INSERTF32X4_256,
29331 IX86_BUILTIN_INSERTI32X4_256,
29332 IX86_BUILTIN_PMOVSXBW256_MASK,
29333 IX86_BUILTIN_PMOVSXBW128_MASK,
29334 IX86_BUILTIN_PMOVSXBD256_MASK,
29335 IX86_BUILTIN_PMOVSXBD128_MASK,
29336 IX86_BUILTIN_PMOVSXBQ256_MASK,
29337 IX86_BUILTIN_PMOVSXBQ128_MASK,
29338 IX86_BUILTIN_PMOVSXWD256_MASK,
29339 IX86_BUILTIN_PMOVSXWD128_MASK,
29340 IX86_BUILTIN_PMOVSXWQ256_MASK,
29341 IX86_BUILTIN_PMOVSXWQ128_MASK,
29342 IX86_BUILTIN_PMOVSXDQ256_MASK,
29343 IX86_BUILTIN_PMOVSXDQ128_MASK,
29344 IX86_BUILTIN_PMOVZXBW256_MASK,
29345 IX86_BUILTIN_PMOVZXBW128_MASK,
29346 IX86_BUILTIN_PMOVZXBD256_MASK,
29347 IX86_BUILTIN_PMOVZXBD128_MASK,
29348 IX86_BUILTIN_PMOVZXBQ256_MASK,
29349 IX86_BUILTIN_PMOVZXBQ128_MASK,
29350 IX86_BUILTIN_PMOVZXWD256_MASK,
29351 IX86_BUILTIN_PMOVZXWD128_MASK,
29352 IX86_BUILTIN_PMOVZXWQ256_MASK,
29353 IX86_BUILTIN_PMOVZXWQ128_MASK,
29354 IX86_BUILTIN_PMOVZXDQ256_MASK,
29355 IX86_BUILTIN_PMOVZXDQ128_MASK,
29356 IX86_BUILTIN_REDUCEPD256_MASK,
29357 IX86_BUILTIN_REDUCEPD128_MASK,
29358 IX86_BUILTIN_REDUCEPS256_MASK,
29359 IX86_BUILTIN_REDUCEPS128_MASK,
29360 IX86_BUILTIN_REDUCESD_MASK,
29361 IX86_BUILTIN_REDUCESS_MASK,
29362 IX86_BUILTIN_VPERMVARHI256_MASK,
29363 IX86_BUILTIN_VPERMVARHI128_MASK,
29364 IX86_BUILTIN_VPERMT2VARHI256,
29365 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29366 IX86_BUILTIN_VPERMT2VARHI128,
29367 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29368 IX86_BUILTIN_VPERMI2VARHI256,
29369 IX86_BUILTIN_VPERMI2VARHI128,
29370 IX86_BUILTIN_RCP14PD256,
29371 IX86_BUILTIN_RCP14PD128,
29372 IX86_BUILTIN_RCP14PS256,
29373 IX86_BUILTIN_RCP14PS128,
29374 IX86_BUILTIN_RSQRT14PD256_MASK,
29375 IX86_BUILTIN_RSQRT14PD128_MASK,
29376 IX86_BUILTIN_RSQRT14PS256_MASK,
29377 IX86_BUILTIN_RSQRT14PS128_MASK,
29378 IX86_BUILTIN_SQRTPD256_MASK,
29379 IX86_BUILTIN_SQRTPD128_MASK,
29380 IX86_BUILTIN_SQRTPS256_MASK,
29381 IX86_BUILTIN_SQRTPS128_MASK,
29382 IX86_BUILTIN_PADDB128_MASK,
29383 IX86_BUILTIN_PADDW128_MASK,
29384 IX86_BUILTIN_PADDD128_MASK,
29385 IX86_BUILTIN_PADDQ128_MASK,
29386 IX86_BUILTIN_PSUBB128_MASK,
29387 IX86_BUILTIN_PSUBW128_MASK,
29388 IX86_BUILTIN_PSUBD128_MASK,
29389 IX86_BUILTIN_PSUBQ128_MASK,
29390 IX86_BUILTIN_PADDSB128_MASK,
29391 IX86_BUILTIN_PADDSW128_MASK,
29392 IX86_BUILTIN_PSUBSB128_MASK,
29393 IX86_BUILTIN_PSUBSW128_MASK,
29394 IX86_BUILTIN_PADDUSB128_MASK,
29395 IX86_BUILTIN_PADDUSW128_MASK,
29396 IX86_BUILTIN_PSUBUSB128_MASK,
29397 IX86_BUILTIN_PSUBUSW128_MASK,
29398 IX86_BUILTIN_PADDB256_MASK,
29399 IX86_BUILTIN_PADDW256_MASK,
29400 IX86_BUILTIN_PADDD256_MASK,
29401 IX86_BUILTIN_PADDQ256_MASK,
29402 IX86_BUILTIN_PADDSB256_MASK,
29403 IX86_BUILTIN_PADDSW256_MASK,
29404 IX86_BUILTIN_PADDUSB256_MASK,
29405 IX86_BUILTIN_PADDUSW256_MASK,
29406 IX86_BUILTIN_PSUBB256_MASK,
29407 IX86_BUILTIN_PSUBW256_MASK,
29408 IX86_BUILTIN_PSUBD256_MASK,
29409 IX86_BUILTIN_PSUBQ256_MASK,
29410 IX86_BUILTIN_PSUBSB256_MASK,
29411 IX86_BUILTIN_PSUBSW256_MASK,
29412 IX86_BUILTIN_PSUBUSB256_MASK,
29413 IX86_BUILTIN_PSUBUSW256_MASK,
29414 IX86_BUILTIN_SHUF_F64x2_256,
29415 IX86_BUILTIN_SHUF_I64x2_256,
29416 IX86_BUILTIN_SHUF_I32x4_256,
29417 IX86_BUILTIN_SHUF_F32x4_256,
29418 IX86_BUILTIN_PMOVWB128,
29419 IX86_BUILTIN_PMOVWB256,
29420 IX86_BUILTIN_PMOVSWB128,
29421 IX86_BUILTIN_PMOVSWB256,
29422 IX86_BUILTIN_PMOVUSWB128,
29423 IX86_BUILTIN_PMOVUSWB256,
29424 IX86_BUILTIN_PMOVDB128,
29425 IX86_BUILTIN_PMOVDB256,
29426 IX86_BUILTIN_PMOVSDB128,
29427 IX86_BUILTIN_PMOVSDB256,
29428 IX86_BUILTIN_PMOVUSDB128,
29429 IX86_BUILTIN_PMOVUSDB256,
29430 IX86_BUILTIN_PMOVDW128,
29431 IX86_BUILTIN_PMOVDW256,
29432 IX86_BUILTIN_PMOVSDW128,
29433 IX86_BUILTIN_PMOVSDW256,
29434 IX86_BUILTIN_PMOVUSDW128,
29435 IX86_BUILTIN_PMOVUSDW256,
29436 IX86_BUILTIN_PMOVQB128,
29437 IX86_BUILTIN_PMOVQB256,
29438 IX86_BUILTIN_PMOVSQB128,
29439 IX86_BUILTIN_PMOVSQB256,
29440 IX86_BUILTIN_PMOVUSQB128,
29441 IX86_BUILTIN_PMOVUSQB256,
29442 IX86_BUILTIN_PMOVQW128,
29443 IX86_BUILTIN_PMOVQW256,
29444 IX86_BUILTIN_PMOVSQW128,
29445 IX86_BUILTIN_PMOVSQW256,
29446 IX86_BUILTIN_PMOVUSQW128,
29447 IX86_BUILTIN_PMOVUSQW256,
29448 IX86_BUILTIN_PMOVQD128,
29449 IX86_BUILTIN_PMOVQD256,
29450 IX86_BUILTIN_PMOVSQD128,
29451 IX86_BUILTIN_PMOVSQD256,
29452 IX86_BUILTIN_PMOVUSQD128,
29453 IX86_BUILTIN_PMOVUSQD256,
29454 IX86_BUILTIN_RANGEPD256,
29455 IX86_BUILTIN_RANGEPD128,
29456 IX86_BUILTIN_RANGEPS256,
29457 IX86_BUILTIN_RANGEPS128,
29458 IX86_BUILTIN_GETEXPPS256,
29459 IX86_BUILTIN_GETEXPPD256,
29460 IX86_BUILTIN_GETEXPPS128,
29461 IX86_BUILTIN_GETEXPPD128,
29462 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29463 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29464 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29465 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29466 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29467 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29468 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29469 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29470 IX86_BUILTIN_PABSQ256,
29471 IX86_BUILTIN_PABSQ128,
29472 IX86_BUILTIN_PABSD256_MASK,
29473 IX86_BUILTIN_PABSD128_MASK,
29474 IX86_BUILTIN_PMULHRSW256_MASK,
29475 IX86_BUILTIN_PMULHRSW128_MASK,
29476 IX86_BUILTIN_PMULHUW128_MASK,
29477 IX86_BUILTIN_PMULHUW256_MASK,
29478 IX86_BUILTIN_PMULHW256_MASK,
29479 IX86_BUILTIN_PMULHW128_MASK,
29480 IX86_BUILTIN_PMULLW256_MASK,
29481 IX86_BUILTIN_PMULLW128_MASK,
29482 IX86_BUILTIN_PMULLQ256,
29483 IX86_BUILTIN_PMULLQ128,
29484 IX86_BUILTIN_ANDPD256_MASK,
29485 IX86_BUILTIN_ANDPD128_MASK,
29486 IX86_BUILTIN_ANDPS256_MASK,
29487 IX86_BUILTIN_ANDPS128_MASK,
29488 IX86_BUILTIN_ANDNPD256_MASK,
29489 IX86_BUILTIN_ANDNPD128_MASK,
29490 IX86_BUILTIN_ANDNPS256_MASK,
29491 IX86_BUILTIN_ANDNPS128_MASK,
29492 IX86_BUILTIN_PSLLWI128_MASK,
29493 IX86_BUILTIN_PSLLDI128_MASK,
29494 IX86_BUILTIN_PSLLQI128_MASK,
29495 IX86_BUILTIN_PSLLW128_MASK,
29496 IX86_BUILTIN_PSLLD128_MASK,
29497 IX86_BUILTIN_PSLLQ128_MASK,
29498 IX86_BUILTIN_PSLLWI256_MASK ,
29499 IX86_BUILTIN_PSLLW256_MASK,
29500 IX86_BUILTIN_PSLLDI256_MASK,
29501 IX86_BUILTIN_PSLLD256_MASK,
29502 IX86_BUILTIN_PSLLQI256_MASK,
29503 IX86_BUILTIN_PSLLQ256_MASK,
29504 IX86_BUILTIN_PSRADI128_MASK,
29505 IX86_BUILTIN_PSRAD128_MASK,
29506 IX86_BUILTIN_PSRADI256_MASK,
29507 IX86_BUILTIN_PSRAD256_MASK,
29508 IX86_BUILTIN_PSRAQI128_MASK,
29509 IX86_BUILTIN_PSRAQ128_MASK,
29510 IX86_BUILTIN_PSRAQI256_MASK,
29511 IX86_BUILTIN_PSRAQ256_MASK,
29512 IX86_BUILTIN_PANDD256,
29513 IX86_BUILTIN_PANDD128,
29514 IX86_BUILTIN_PSRLDI128_MASK,
29515 IX86_BUILTIN_PSRLD128_MASK,
29516 IX86_BUILTIN_PSRLDI256_MASK,
29517 IX86_BUILTIN_PSRLD256_MASK,
29518 IX86_BUILTIN_PSRLQI128_MASK,
29519 IX86_BUILTIN_PSRLQ128_MASK,
29520 IX86_BUILTIN_PSRLQI256_MASK,
29521 IX86_BUILTIN_PSRLQ256_MASK,
29522 IX86_BUILTIN_PANDQ256,
29523 IX86_BUILTIN_PANDQ128,
29524 IX86_BUILTIN_PANDND256,
29525 IX86_BUILTIN_PANDND128,
29526 IX86_BUILTIN_PANDNQ256,
29527 IX86_BUILTIN_PANDNQ128,
29528 IX86_BUILTIN_PORD256,
29529 IX86_BUILTIN_PORD128,
29530 IX86_BUILTIN_PORQ256,
29531 IX86_BUILTIN_PORQ128,
29532 IX86_BUILTIN_PXORD256,
29533 IX86_BUILTIN_PXORD128,
29534 IX86_BUILTIN_PXORQ256,
29535 IX86_BUILTIN_PXORQ128,
29536 IX86_BUILTIN_PACKSSWB256_MASK,
29537 IX86_BUILTIN_PACKSSWB128_MASK,
29538 IX86_BUILTIN_PACKUSWB256_MASK,
29539 IX86_BUILTIN_PACKUSWB128_MASK,
29540 IX86_BUILTIN_RNDSCALEPS256,
29541 IX86_BUILTIN_RNDSCALEPD256,
29542 IX86_BUILTIN_RNDSCALEPS128,
29543 IX86_BUILTIN_RNDSCALEPD128,
29544 IX86_BUILTIN_VTERNLOGQ256_MASK,
29545 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29546 IX86_BUILTIN_VTERNLOGD256_MASK,
29547 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29548 IX86_BUILTIN_VTERNLOGQ128_MASK,
29549 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29550 IX86_BUILTIN_VTERNLOGD128_MASK,
29551 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29552 IX86_BUILTIN_SCALEFPD256,
29553 IX86_BUILTIN_SCALEFPS256,
29554 IX86_BUILTIN_SCALEFPD128,
29555 IX86_BUILTIN_SCALEFPS128,
29556 IX86_BUILTIN_VFMADDPD256_MASK,
29557 IX86_BUILTIN_VFMADDPD256_MASK3,
29558 IX86_BUILTIN_VFMADDPD256_MASKZ,
29559 IX86_BUILTIN_VFMADDPD128_MASK,
29560 IX86_BUILTIN_VFMADDPD128_MASK3,
29561 IX86_BUILTIN_VFMADDPD128_MASKZ,
29562 IX86_BUILTIN_VFMADDPS256_MASK,
29563 IX86_BUILTIN_VFMADDPS256_MASK3,
29564 IX86_BUILTIN_VFMADDPS256_MASKZ,
29565 IX86_BUILTIN_VFMADDPS128_MASK,
29566 IX86_BUILTIN_VFMADDPS128_MASK3,
29567 IX86_BUILTIN_VFMADDPS128_MASKZ,
29568 IX86_BUILTIN_VFMSUBPD256_MASK3,
29569 IX86_BUILTIN_VFMSUBPD128_MASK3,
29570 IX86_BUILTIN_VFMSUBPS256_MASK3,
29571 IX86_BUILTIN_VFMSUBPS128_MASK3,
29572 IX86_BUILTIN_VFNMADDPD256_MASK,
29573 IX86_BUILTIN_VFNMADDPD128_MASK,
29574 IX86_BUILTIN_VFNMADDPS256_MASK,
29575 IX86_BUILTIN_VFNMADDPS128_MASK,
29576 IX86_BUILTIN_VFNMSUBPD256_MASK,
29577 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29578 IX86_BUILTIN_VFNMSUBPD128_MASK,
29579 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29580 IX86_BUILTIN_VFNMSUBPS256_MASK,
29581 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29582 IX86_BUILTIN_VFNMSUBPS128_MASK,
29583 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29584 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29585 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29586 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29587 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29588 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29589 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29590 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29591 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29592 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29593 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29594 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29595 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29596 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29597 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29598 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29599 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29600 IX86_BUILTIN_INSERTF64X2_256,
29601 IX86_BUILTIN_INSERTI64X2_256,
29602 IX86_BUILTIN_PSRAVV16HI,
29603 IX86_BUILTIN_PSRAVV8HI,
29604 IX86_BUILTIN_PMADDUBSW256_MASK,
29605 IX86_BUILTIN_PMADDUBSW128_MASK,
29606 IX86_BUILTIN_PMADDWD256_MASK,
29607 IX86_BUILTIN_PMADDWD128_MASK,
29608 IX86_BUILTIN_PSRLVV16HI,
29609 IX86_BUILTIN_PSRLVV8HI,
29610 IX86_BUILTIN_CVTPS2DQ256_MASK,
29611 IX86_BUILTIN_CVTPS2DQ128_MASK,
29612 IX86_BUILTIN_CVTPS2UDQ256,
29613 IX86_BUILTIN_CVTPS2UDQ128,
29614 IX86_BUILTIN_CVTPS2QQ256,
29615 IX86_BUILTIN_CVTPS2QQ128,
29616 IX86_BUILTIN_CVTPS2UQQ256,
29617 IX86_BUILTIN_CVTPS2UQQ128,
29618 IX86_BUILTIN_GETMANTPS256,
29619 IX86_BUILTIN_GETMANTPS128,
29620 IX86_BUILTIN_GETMANTPD256,
29621 IX86_BUILTIN_GETMANTPD128,
29622 IX86_BUILTIN_MOVDDUP256_MASK,
29623 IX86_BUILTIN_MOVDDUP128_MASK,
29624 IX86_BUILTIN_MOVSHDUP256_MASK,
29625 IX86_BUILTIN_MOVSHDUP128_MASK,
29626 IX86_BUILTIN_MOVSLDUP256_MASK,
29627 IX86_BUILTIN_MOVSLDUP128_MASK,
29628 IX86_BUILTIN_CVTQQ2PS256,
29629 IX86_BUILTIN_CVTQQ2PS128,
29630 IX86_BUILTIN_CVTUQQ2PS256,
29631 IX86_BUILTIN_CVTUQQ2PS128,
29632 IX86_BUILTIN_CVTQQ2PD256,
29633 IX86_BUILTIN_CVTQQ2PD128,
29634 IX86_BUILTIN_CVTUQQ2PD256,
29635 IX86_BUILTIN_CVTUQQ2PD128,
29636 IX86_BUILTIN_VPERMT2VARQ256,
29637 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29638 IX86_BUILTIN_VPERMT2VARD256,
29639 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29640 IX86_BUILTIN_VPERMI2VARQ256,
29641 IX86_BUILTIN_VPERMI2VARD256,
29642 IX86_BUILTIN_VPERMT2VARPD256,
29643 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29644 IX86_BUILTIN_VPERMT2VARPS256,
29645 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29646 IX86_BUILTIN_VPERMI2VARPD256,
29647 IX86_BUILTIN_VPERMI2VARPS256,
29648 IX86_BUILTIN_VPERMT2VARQ128,
29649 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29650 IX86_BUILTIN_VPERMT2VARD128,
29651 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29652 IX86_BUILTIN_VPERMI2VARQ128,
29653 IX86_BUILTIN_VPERMI2VARD128,
29654 IX86_BUILTIN_VPERMT2VARPD128,
29655 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29656 IX86_BUILTIN_VPERMT2VARPS128,
29657 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29658 IX86_BUILTIN_VPERMI2VARPD128,
29659 IX86_BUILTIN_VPERMI2VARPS128,
29660 IX86_BUILTIN_PSHUFB256_MASK,
29661 IX86_BUILTIN_PSHUFB128_MASK,
29662 IX86_BUILTIN_PSHUFHW256_MASK,
29663 IX86_BUILTIN_PSHUFHW128_MASK,
29664 IX86_BUILTIN_PSHUFLW256_MASK,
29665 IX86_BUILTIN_PSHUFLW128_MASK,
29666 IX86_BUILTIN_PSHUFD256_MASK,
29667 IX86_BUILTIN_PSHUFD128_MASK,
29668 IX86_BUILTIN_SHUFPD256_MASK,
29669 IX86_BUILTIN_SHUFPD128_MASK,
29670 IX86_BUILTIN_SHUFPS256_MASK,
29671 IX86_BUILTIN_SHUFPS128_MASK,
29672 IX86_BUILTIN_PROLVQ256,
29673 IX86_BUILTIN_PROLVQ128,
29674 IX86_BUILTIN_PROLQ256,
29675 IX86_BUILTIN_PROLQ128,
29676 IX86_BUILTIN_PRORVQ256,
29677 IX86_BUILTIN_PRORVQ128,
29678 IX86_BUILTIN_PRORQ256,
29679 IX86_BUILTIN_PRORQ128,
29680 IX86_BUILTIN_PSRAVQ128,
29681 IX86_BUILTIN_PSRAVQ256,
29682 IX86_BUILTIN_PSLLVV4DI_MASK,
29683 IX86_BUILTIN_PSLLVV2DI_MASK,
29684 IX86_BUILTIN_PSLLVV8SI_MASK,
29685 IX86_BUILTIN_PSLLVV4SI_MASK,
29686 IX86_BUILTIN_PSRAVV8SI_MASK,
29687 IX86_BUILTIN_PSRAVV4SI_MASK,
29688 IX86_BUILTIN_PSRLVV4DI_MASK,
29689 IX86_BUILTIN_PSRLVV2DI_MASK,
29690 IX86_BUILTIN_PSRLVV8SI_MASK,
29691 IX86_BUILTIN_PSRLVV4SI_MASK,
29692 IX86_BUILTIN_PSRAWI256_MASK,
29693 IX86_BUILTIN_PSRAW256_MASK,
29694 IX86_BUILTIN_PSRAWI128_MASK,
29695 IX86_BUILTIN_PSRAW128_MASK,
29696 IX86_BUILTIN_PSRLWI256_MASK,
29697 IX86_BUILTIN_PSRLW256_MASK,
29698 IX86_BUILTIN_PSRLWI128_MASK,
29699 IX86_BUILTIN_PSRLW128_MASK,
29700 IX86_BUILTIN_PRORVD256,
29701 IX86_BUILTIN_PROLVD256,
29702 IX86_BUILTIN_PRORD256,
29703 IX86_BUILTIN_PROLD256,
29704 IX86_BUILTIN_PRORVD128,
29705 IX86_BUILTIN_PROLVD128,
29706 IX86_BUILTIN_PRORD128,
29707 IX86_BUILTIN_PROLD128,
29708 IX86_BUILTIN_FPCLASSPD256,
29709 IX86_BUILTIN_FPCLASSPD128,
29710 IX86_BUILTIN_FPCLASSSD,
29711 IX86_BUILTIN_FPCLASSPS256,
29712 IX86_BUILTIN_FPCLASSPS128,
29713 IX86_BUILTIN_FPCLASSSS,
29714 IX86_BUILTIN_CVTB2MASK128,
29715 IX86_BUILTIN_CVTB2MASK256,
29716 IX86_BUILTIN_CVTW2MASK128,
29717 IX86_BUILTIN_CVTW2MASK256,
29718 IX86_BUILTIN_CVTD2MASK128,
29719 IX86_BUILTIN_CVTD2MASK256,
29720 IX86_BUILTIN_CVTQ2MASK128,
29721 IX86_BUILTIN_CVTQ2MASK256,
29722 IX86_BUILTIN_CVTMASK2B128,
29723 IX86_BUILTIN_CVTMASK2B256,
29724 IX86_BUILTIN_CVTMASK2W128,
29725 IX86_BUILTIN_CVTMASK2W256,
29726 IX86_BUILTIN_CVTMASK2D128,
29727 IX86_BUILTIN_CVTMASK2D256,
29728 IX86_BUILTIN_CVTMASK2Q128,
29729 IX86_BUILTIN_CVTMASK2Q256,
29730 IX86_BUILTIN_PCMPEQB128_MASK,
29731 IX86_BUILTIN_PCMPEQB256_MASK,
29732 IX86_BUILTIN_PCMPEQW128_MASK,
29733 IX86_BUILTIN_PCMPEQW256_MASK,
29734 IX86_BUILTIN_PCMPEQD128_MASK,
29735 IX86_BUILTIN_PCMPEQD256_MASK,
29736 IX86_BUILTIN_PCMPEQQ128_MASK,
29737 IX86_BUILTIN_PCMPEQQ256_MASK,
29738 IX86_BUILTIN_PCMPGTB128_MASK,
29739 IX86_BUILTIN_PCMPGTB256_MASK,
29740 IX86_BUILTIN_PCMPGTW128_MASK,
29741 IX86_BUILTIN_PCMPGTW256_MASK,
29742 IX86_BUILTIN_PCMPGTD128_MASK,
29743 IX86_BUILTIN_PCMPGTD256_MASK,
29744 IX86_BUILTIN_PCMPGTQ128_MASK,
29745 IX86_BUILTIN_PCMPGTQ256_MASK,
29746 IX86_BUILTIN_PTESTMB128,
29747 IX86_BUILTIN_PTESTMB256,
29748 IX86_BUILTIN_PTESTMW128,
29749 IX86_BUILTIN_PTESTMW256,
29750 IX86_BUILTIN_PTESTMD128,
29751 IX86_BUILTIN_PTESTMD256,
29752 IX86_BUILTIN_PTESTMQ128,
29753 IX86_BUILTIN_PTESTMQ256,
29754 IX86_BUILTIN_PTESTNMB128,
29755 IX86_BUILTIN_PTESTNMB256,
29756 IX86_BUILTIN_PTESTNMW128,
29757 IX86_BUILTIN_PTESTNMW256,
29758 IX86_BUILTIN_PTESTNMD128,
29759 IX86_BUILTIN_PTESTNMD256,
29760 IX86_BUILTIN_PTESTNMQ128,
29761 IX86_BUILTIN_PTESTNMQ256,
29762 IX86_BUILTIN_PBROADCASTMB128,
29763 IX86_BUILTIN_PBROADCASTMB256,
29764 IX86_BUILTIN_PBROADCASTMW128,
29765 IX86_BUILTIN_PBROADCASTMW256,
29766 IX86_BUILTIN_COMPRESSPD256,
29767 IX86_BUILTIN_COMPRESSPD128,
29768 IX86_BUILTIN_COMPRESSPS256,
29769 IX86_BUILTIN_COMPRESSPS128,
29770 IX86_BUILTIN_PCOMPRESSQ256,
29771 IX86_BUILTIN_PCOMPRESSQ128,
29772 IX86_BUILTIN_PCOMPRESSD256,
29773 IX86_BUILTIN_PCOMPRESSD128,
29774 IX86_BUILTIN_EXPANDPD256,
29775 IX86_BUILTIN_EXPANDPD128,
29776 IX86_BUILTIN_EXPANDPS256,
29777 IX86_BUILTIN_EXPANDPS128,
29778 IX86_BUILTIN_PEXPANDQ256,
29779 IX86_BUILTIN_PEXPANDQ128,
29780 IX86_BUILTIN_PEXPANDD256,
29781 IX86_BUILTIN_PEXPANDD128,
29782 IX86_BUILTIN_EXPANDPD256Z,
29783 IX86_BUILTIN_EXPANDPD128Z,
29784 IX86_BUILTIN_EXPANDPS256Z,
29785 IX86_BUILTIN_EXPANDPS128Z,
29786 IX86_BUILTIN_PEXPANDQ256Z,
29787 IX86_BUILTIN_PEXPANDQ128Z,
29788 IX86_BUILTIN_PEXPANDD256Z,
29789 IX86_BUILTIN_PEXPANDD128Z,
29790 IX86_BUILTIN_PMAXSD256_MASK,
29791 IX86_BUILTIN_PMINSD256_MASK,
29792 IX86_BUILTIN_PMAXUD256_MASK,
29793 IX86_BUILTIN_PMINUD256_MASK,
29794 IX86_BUILTIN_PMAXSD128_MASK,
29795 IX86_BUILTIN_PMINSD128_MASK,
29796 IX86_BUILTIN_PMAXUD128_MASK,
29797 IX86_BUILTIN_PMINUD128_MASK,
29798 IX86_BUILTIN_PMAXSQ256_MASK,
29799 IX86_BUILTIN_PMINSQ256_MASK,
29800 IX86_BUILTIN_PMAXUQ256_MASK,
29801 IX86_BUILTIN_PMINUQ256_MASK,
29802 IX86_BUILTIN_PMAXSQ128_MASK,
29803 IX86_BUILTIN_PMINSQ128_MASK,
29804 IX86_BUILTIN_PMAXUQ128_MASK,
29805 IX86_BUILTIN_PMINUQ128_MASK,
29806 IX86_BUILTIN_PMINSB256_MASK,
29807 IX86_BUILTIN_PMINUB256_MASK,
29808 IX86_BUILTIN_PMAXSB256_MASK,
29809 IX86_BUILTIN_PMAXUB256_MASK,
29810 IX86_BUILTIN_PMINSB128_MASK,
29811 IX86_BUILTIN_PMINUB128_MASK,
29812 IX86_BUILTIN_PMAXSB128_MASK,
29813 IX86_BUILTIN_PMAXUB128_MASK,
29814 IX86_BUILTIN_PMINSW256_MASK,
29815 IX86_BUILTIN_PMINUW256_MASK,
29816 IX86_BUILTIN_PMAXSW256_MASK,
29817 IX86_BUILTIN_PMAXUW256_MASK,
29818 IX86_BUILTIN_PMINSW128_MASK,
29819 IX86_BUILTIN_PMINUW128_MASK,
29820 IX86_BUILTIN_PMAXSW128_MASK,
29821 IX86_BUILTIN_PMAXUW128_MASK,
29822 IX86_BUILTIN_VPCONFLICTQ256,
29823 IX86_BUILTIN_VPCONFLICTD256,
29824 IX86_BUILTIN_VPCLZCNTQ256,
29825 IX86_BUILTIN_VPCLZCNTD256,
29826 IX86_BUILTIN_UNPCKHPD256_MASK,
29827 IX86_BUILTIN_UNPCKHPD128_MASK,
29828 IX86_BUILTIN_UNPCKHPS256_MASK,
29829 IX86_BUILTIN_UNPCKHPS128_MASK,
29830 IX86_BUILTIN_UNPCKLPD256_MASK,
29831 IX86_BUILTIN_UNPCKLPD128_MASK,
29832 IX86_BUILTIN_UNPCKLPS256_MASK,
29833 IX86_BUILTIN_VPCONFLICTQ128,
29834 IX86_BUILTIN_VPCONFLICTD128,
29835 IX86_BUILTIN_VPCLZCNTQ128,
29836 IX86_BUILTIN_VPCLZCNTD128,
29837 IX86_BUILTIN_UNPCKLPS128_MASK,
29838 IX86_BUILTIN_ALIGND256,
29839 IX86_BUILTIN_ALIGNQ256,
29840 IX86_BUILTIN_ALIGND128,
29841 IX86_BUILTIN_ALIGNQ128,
29842 IX86_BUILTIN_CVTPS2PH256_MASK,
29843 IX86_BUILTIN_CVTPS2PH_MASK,
29844 IX86_BUILTIN_CVTPH2PS_MASK,
29845 IX86_BUILTIN_CVTPH2PS256_MASK,
29846 IX86_BUILTIN_PUNPCKHDQ128_MASK,
29847 IX86_BUILTIN_PUNPCKHDQ256_MASK,
29848 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
29849 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
29850 IX86_BUILTIN_PUNPCKLDQ128_MASK,
29851 IX86_BUILTIN_PUNPCKLDQ256_MASK,
29852 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
29853 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
29854 IX86_BUILTIN_PUNPCKHBW128_MASK,
29855 IX86_BUILTIN_PUNPCKHBW256_MASK,
29856 IX86_BUILTIN_PUNPCKHWD128_MASK,
29857 IX86_BUILTIN_PUNPCKHWD256_MASK,
29858 IX86_BUILTIN_PUNPCKLBW128_MASK,
29859 IX86_BUILTIN_PUNPCKLBW256_MASK,
29860 IX86_BUILTIN_PUNPCKLWD128_MASK,
29861 IX86_BUILTIN_PUNPCKLWD256_MASK,
29862 IX86_BUILTIN_PSLLVV16HI,
29863 IX86_BUILTIN_PSLLVV8HI,
29864 IX86_BUILTIN_PACKSSDW256_MASK,
29865 IX86_BUILTIN_PACKSSDW128_MASK,
29866 IX86_BUILTIN_PACKUSDW256_MASK,
29867 IX86_BUILTIN_PACKUSDW128_MASK,
29868 IX86_BUILTIN_PAVGB256_MASK,
29869 IX86_BUILTIN_PAVGW256_MASK,
29870 IX86_BUILTIN_PAVGB128_MASK,
29871 IX86_BUILTIN_PAVGW128_MASK,
29872 IX86_BUILTIN_VPERMVARSF256_MASK,
29873 IX86_BUILTIN_VPERMVARDF256_MASK,
29874 IX86_BUILTIN_VPERMDF256_MASK,
29875 IX86_BUILTIN_PABSB256_MASK,
29876 IX86_BUILTIN_PABSB128_MASK,
29877 IX86_BUILTIN_PABSW256_MASK,
29878 IX86_BUILTIN_PABSW128_MASK,
29879 IX86_BUILTIN_VPERMILVARPD_MASK,
29880 IX86_BUILTIN_VPERMILVARPS_MASK,
29881 IX86_BUILTIN_VPERMILVARPD256_MASK,
29882 IX86_BUILTIN_VPERMILVARPS256_MASK,
29883 IX86_BUILTIN_VPERMILPD_MASK,
29884 IX86_BUILTIN_VPERMILPS_MASK,
29885 IX86_BUILTIN_VPERMILPD256_MASK,
29886 IX86_BUILTIN_VPERMILPS256_MASK,
29887 IX86_BUILTIN_BLENDMQ256,
29888 IX86_BUILTIN_BLENDMD256,
29889 IX86_BUILTIN_BLENDMPD256,
29890 IX86_BUILTIN_BLENDMPS256,
29891 IX86_BUILTIN_BLENDMQ128,
29892 IX86_BUILTIN_BLENDMD128,
29893 IX86_BUILTIN_BLENDMPD128,
29894 IX86_BUILTIN_BLENDMPS128,
29895 IX86_BUILTIN_BLENDMW256,
29896 IX86_BUILTIN_BLENDMB256,
29897 IX86_BUILTIN_BLENDMW128,
29898 IX86_BUILTIN_BLENDMB128,
29899 IX86_BUILTIN_PMULLD256_MASK,
29900 IX86_BUILTIN_PMULLD128_MASK,
29901 IX86_BUILTIN_PMULUDQ256_MASK,
29902 IX86_BUILTIN_PMULDQ256_MASK,
29903 IX86_BUILTIN_PMULDQ128_MASK,
29904 IX86_BUILTIN_PMULUDQ128_MASK,
29905 IX86_BUILTIN_CVTPD2PS256_MASK,
29906 IX86_BUILTIN_CVTPD2PS_MASK,
29907 IX86_BUILTIN_VPERMVARSI256_MASK,
29908 IX86_BUILTIN_VPERMVARDI256_MASK,
29909 IX86_BUILTIN_VPERMDI256_MASK,
29910 IX86_BUILTIN_CMPQ256,
29911 IX86_BUILTIN_CMPD256,
29912 IX86_BUILTIN_UCMPQ256,
29913 IX86_BUILTIN_UCMPD256,
29914 IX86_BUILTIN_CMPB256,
29915 IX86_BUILTIN_CMPW256,
29916 IX86_BUILTIN_UCMPB256,
29917 IX86_BUILTIN_UCMPW256,
29918 IX86_BUILTIN_CMPPD256_MASK,
29919 IX86_BUILTIN_CMPPS256_MASK,
29920 IX86_BUILTIN_CMPQ128,
29921 IX86_BUILTIN_CMPD128,
29922 IX86_BUILTIN_UCMPQ128,
29923 IX86_BUILTIN_UCMPD128,
29924 IX86_BUILTIN_CMPB128,
29925 IX86_BUILTIN_CMPW128,
29926 IX86_BUILTIN_UCMPB128,
29927 IX86_BUILTIN_UCMPW128,
29928 IX86_BUILTIN_CMPPD128_MASK,
29929 IX86_BUILTIN_CMPPS128_MASK,
29931 IX86_BUILTIN_GATHER3SIV8SF,
29932 IX86_BUILTIN_GATHER3SIV4SF,
29933 IX86_BUILTIN_GATHER3SIV4DF,
29934 IX86_BUILTIN_GATHER3SIV2DF,
29935 IX86_BUILTIN_GATHER3DIV8SF,
29936 IX86_BUILTIN_GATHER3DIV4SF,
29937 IX86_BUILTIN_GATHER3DIV4DF,
29938 IX86_BUILTIN_GATHER3DIV2DF,
29939 IX86_BUILTIN_GATHER3SIV8SI,
29940 IX86_BUILTIN_GATHER3SIV4SI,
29941 IX86_BUILTIN_GATHER3SIV4DI,
29942 IX86_BUILTIN_GATHER3SIV2DI,
29943 IX86_BUILTIN_GATHER3DIV8SI,
29944 IX86_BUILTIN_GATHER3DIV4SI,
29945 IX86_BUILTIN_GATHER3DIV4DI,
29946 IX86_BUILTIN_GATHER3DIV2DI,
29947 IX86_BUILTIN_SCATTERSIV8SF,
29948 IX86_BUILTIN_SCATTERSIV4SF,
29949 IX86_BUILTIN_SCATTERSIV4DF,
29950 IX86_BUILTIN_SCATTERSIV2DF,
29951 IX86_BUILTIN_SCATTERDIV8SF,
29952 IX86_BUILTIN_SCATTERDIV4SF,
29953 IX86_BUILTIN_SCATTERDIV4DF,
29954 IX86_BUILTIN_SCATTERDIV2DF,
29955 IX86_BUILTIN_SCATTERSIV8SI,
29956 IX86_BUILTIN_SCATTERSIV4SI,
29957 IX86_BUILTIN_SCATTERSIV4DI,
29958 IX86_BUILTIN_SCATTERSIV2DI,
29959 IX86_BUILTIN_SCATTERDIV8SI,
29960 IX86_BUILTIN_SCATTERDIV4SI,
29961 IX86_BUILTIN_SCATTERDIV4DI,
29962 IX86_BUILTIN_SCATTERDIV2DI,
29965 IX86_BUILTIN_RANGESD128,
29966 IX86_BUILTIN_RANGESS128,
29967 IX86_BUILTIN_KUNPCKWD,
29968 IX86_BUILTIN_KUNPCKDQ,
29969 IX86_BUILTIN_BROADCASTF32x2_512,
29970 IX86_BUILTIN_BROADCASTI32x2_512,
29971 IX86_BUILTIN_BROADCASTF64X2_512,
29972 IX86_BUILTIN_BROADCASTI64X2_512,
29973 IX86_BUILTIN_BROADCASTF32X8_512,
29974 IX86_BUILTIN_BROADCASTI32X8_512,
29975 IX86_BUILTIN_EXTRACTF64X2_512,
29976 IX86_BUILTIN_EXTRACTF32X8,
29977 IX86_BUILTIN_EXTRACTI64X2_512,
29978 IX86_BUILTIN_EXTRACTI32X8,
29979 IX86_BUILTIN_REDUCEPD512_MASK,
29980 IX86_BUILTIN_REDUCEPS512_MASK,
29981 IX86_BUILTIN_PMULLQ512,
29982 IX86_BUILTIN_XORPD512,
29983 IX86_BUILTIN_XORPS512,
29984 IX86_BUILTIN_ORPD512,
29985 IX86_BUILTIN_ORPS512,
29986 IX86_BUILTIN_ANDPD512,
29987 IX86_BUILTIN_ANDPS512,
29988 IX86_BUILTIN_ANDNPD512,
29989 IX86_BUILTIN_ANDNPS512,
29990 IX86_BUILTIN_INSERTF32X8,
29991 IX86_BUILTIN_INSERTI32X8,
29992 IX86_BUILTIN_INSERTF64X2_512,
29993 IX86_BUILTIN_INSERTI64X2_512,
29994 IX86_BUILTIN_FPCLASSPD512,
29995 IX86_BUILTIN_FPCLASSPS512,
29996 IX86_BUILTIN_CVTD2MASK512,
29997 IX86_BUILTIN_CVTQ2MASK512,
29998 IX86_BUILTIN_CVTMASK2D512,
29999 IX86_BUILTIN_CVTMASK2Q512,
30000 IX86_BUILTIN_CVTPD2QQ512,
30001 IX86_BUILTIN_CVTPS2QQ512,
30002 IX86_BUILTIN_CVTPD2UQQ512,
30003 IX86_BUILTIN_CVTPS2UQQ512,
30004 IX86_BUILTIN_CVTQQ2PS512,
30005 IX86_BUILTIN_CVTUQQ2PS512,
30006 IX86_BUILTIN_CVTQQ2PD512,
30007 IX86_BUILTIN_CVTUQQ2PD512,
30008 IX86_BUILTIN_CVTTPS2QQ512,
30009 IX86_BUILTIN_CVTTPS2UQQ512,
30010 IX86_BUILTIN_CVTTPD2QQ512,
30011 IX86_BUILTIN_CVTTPD2UQQ512,
30012 IX86_BUILTIN_RANGEPS512,
30013 IX86_BUILTIN_RANGEPD512,
30016 IX86_BUILTIN_PACKUSDW512,
30017 IX86_BUILTIN_PACKSSDW512,
30018 IX86_BUILTIN_LOADDQUHI512_MASK,
30019 IX86_BUILTIN_LOADDQUQI512_MASK,
30020 IX86_BUILTIN_PSLLDQ512,
30021 IX86_BUILTIN_PSRLDQ512,
30022 IX86_BUILTIN_STOREDQUHI512_MASK,
30023 IX86_BUILTIN_STOREDQUQI512_MASK,
30024 IX86_BUILTIN_PALIGNR512,
30025 IX86_BUILTIN_PALIGNR512_MASK,
30026 IX86_BUILTIN_MOVDQUHI512_MASK,
30027 IX86_BUILTIN_MOVDQUQI512_MASK,
30028 IX86_BUILTIN_PSADBW512,
30029 IX86_BUILTIN_DBPSADBW512,
30030 IX86_BUILTIN_PBROADCASTB512,
30031 IX86_BUILTIN_PBROADCASTB512_GPR,
30032 IX86_BUILTIN_PBROADCASTW512,
30033 IX86_BUILTIN_PBROADCASTW512_GPR,
30034 IX86_BUILTIN_PMOVSXBW512_MASK,
30035 IX86_BUILTIN_PMOVZXBW512_MASK,
30036 IX86_BUILTIN_VPERMVARHI512_MASK,
30037 IX86_BUILTIN_VPERMT2VARHI512,
30038 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
30039 IX86_BUILTIN_VPERMI2VARHI512,
30040 IX86_BUILTIN_PAVGB512,
30041 IX86_BUILTIN_PAVGW512,
30042 IX86_BUILTIN_PADDB512,
30043 IX86_BUILTIN_PSUBB512,
30044 IX86_BUILTIN_PSUBSB512,
30045 IX86_BUILTIN_PADDSB512,
30046 IX86_BUILTIN_PSUBUSB512,
30047 IX86_BUILTIN_PADDUSB512,
30048 IX86_BUILTIN_PSUBW512,
30049 IX86_BUILTIN_PADDW512,
30050 IX86_BUILTIN_PSUBSW512,
30051 IX86_BUILTIN_PADDSW512,
30052 IX86_BUILTIN_PSUBUSW512,
30053 IX86_BUILTIN_PADDUSW512,
30054 IX86_BUILTIN_PMAXUW512,
30055 IX86_BUILTIN_PMAXSW512,
30056 IX86_BUILTIN_PMINUW512,
30057 IX86_BUILTIN_PMINSW512,
30058 IX86_BUILTIN_PMAXUB512,
30059 IX86_BUILTIN_PMAXSB512,
30060 IX86_BUILTIN_PMINUB512,
30061 IX86_BUILTIN_PMINSB512,
30062 IX86_BUILTIN_PMOVWB512,
30063 IX86_BUILTIN_PMOVSWB512,
30064 IX86_BUILTIN_PMOVUSWB512,
30065 IX86_BUILTIN_PMULHRSW512_MASK,
30066 IX86_BUILTIN_PMULHUW512_MASK,
30067 IX86_BUILTIN_PMULHW512_MASK,
30068 IX86_BUILTIN_PMULLW512_MASK,
30069 IX86_BUILTIN_PSLLWI512_MASK,
30070 IX86_BUILTIN_PSLLW512_MASK,
30071 IX86_BUILTIN_PACKSSWB512,
30072 IX86_BUILTIN_PACKUSWB512,
30073 IX86_BUILTIN_PSRAVV32HI,
30074 IX86_BUILTIN_PMADDUBSW512_MASK,
30075 IX86_BUILTIN_PMADDWD512_MASK,
30076 IX86_BUILTIN_PSRLVV32HI,
30077 IX86_BUILTIN_PUNPCKHBW512,
30078 IX86_BUILTIN_PUNPCKHWD512,
30079 IX86_BUILTIN_PUNPCKLBW512,
30080 IX86_BUILTIN_PUNPCKLWD512,
30081 IX86_BUILTIN_PSHUFB512,
30082 IX86_BUILTIN_PSHUFHW512,
30083 IX86_BUILTIN_PSHUFLW512,
30084 IX86_BUILTIN_PSRAWI512,
30085 IX86_BUILTIN_PSRAW512,
30086 IX86_BUILTIN_PSRLWI512,
30087 IX86_BUILTIN_PSRLW512,
30088 IX86_BUILTIN_CVTB2MASK512,
30089 IX86_BUILTIN_CVTW2MASK512,
30090 IX86_BUILTIN_CVTMASK2B512,
30091 IX86_BUILTIN_CVTMASK2W512,
30092 IX86_BUILTIN_PCMPEQB512_MASK,
30093 IX86_BUILTIN_PCMPEQW512_MASK,
30094 IX86_BUILTIN_PCMPGTB512_MASK,
30095 IX86_BUILTIN_PCMPGTW512_MASK,
30096 IX86_BUILTIN_PTESTMB512,
30097 IX86_BUILTIN_PTESTMW512,
30098 IX86_BUILTIN_PTESTNMB512,
30099 IX86_BUILTIN_PTESTNMW512,
30100 IX86_BUILTIN_PSLLVV32HI,
30101 IX86_BUILTIN_PABSB512,
30102 IX86_BUILTIN_PABSW512,
30103 IX86_BUILTIN_BLENDMW512,
30104 IX86_BUILTIN_BLENDMB512,
30105 IX86_BUILTIN_CMPB512,
30106 IX86_BUILTIN_CMPW512,
30107 IX86_BUILTIN_UCMPB512,
30108 IX86_BUILTIN_UCMPW512,
30110 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30111 where all operands are 32-byte or 64-byte wide respectively. */
30112 IX86_BUILTIN_GATHERALTSIV4DF,
30113 IX86_BUILTIN_GATHERALTDIV8SF,
30114 IX86_BUILTIN_GATHERALTSIV4DI,
30115 IX86_BUILTIN_GATHERALTDIV8SI,
30116 IX86_BUILTIN_GATHER3ALTDIV16SF,
30117 IX86_BUILTIN_GATHER3ALTDIV16SI,
30118 IX86_BUILTIN_GATHER3ALTSIV4DF,
30119 IX86_BUILTIN_GATHER3ALTDIV8SF,
30120 IX86_BUILTIN_GATHER3ALTSIV4DI,
30121 IX86_BUILTIN_GATHER3ALTDIV8SI,
30122 IX86_BUILTIN_GATHER3ALTSIV8DF,
30123 IX86_BUILTIN_GATHER3ALTSIV8DI,
30124 IX86_BUILTIN_GATHER3DIV16SF,
30125 IX86_BUILTIN_GATHER3DIV16SI,
30126 IX86_BUILTIN_GATHER3DIV8DF,
30127 IX86_BUILTIN_GATHER3DIV8DI,
30128 IX86_BUILTIN_GATHER3SIV16SF,
30129 IX86_BUILTIN_GATHER3SIV16SI,
30130 IX86_BUILTIN_GATHER3SIV8DF,
30131 IX86_BUILTIN_GATHER3SIV8DI,
30132 IX86_BUILTIN_SCATTERDIV16SF,
30133 IX86_BUILTIN_SCATTERDIV16SI,
30134 IX86_BUILTIN_SCATTERDIV8DF,
30135 IX86_BUILTIN_SCATTERDIV8DI,
30136 IX86_BUILTIN_SCATTERSIV16SF,
30137 IX86_BUILTIN_SCATTERSIV16SI,
30138 IX86_BUILTIN_SCATTERSIV8DF,
30139 IX86_BUILTIN_SCATTERSIV8DI,
30142 IX86_BUILTIN_GATHERPFQPD,
30143 IX86_BUILTIN_GATHERPFDPS,
30144 IX86_BUILTIN_GATHERPFDPD,
30145 IX86_BUILTIN_GATHERPFQPS,
30146 IX86_BUILTIN_SCATTERPFDPD,
30147 IX86_BUILTIN_SCATTERPFDPS,
30148 IX86_BUILTIN_SCATTERPFQPD,
30149 IX86_BUILTIN_SCATTERPFQPS,
30152 IX86_BUILTIN_EXP2PD_MASK,
30153 IX86_BUILTIN_EXP2PS_MASK,
30154 IX86_BUILTIN_EXP2PS,
30155 IX86_BUILTIN_RCP28PD,
30156 IX86_BUILTIN_RCP28PS,
30157 IX86_BUILTIN_RCP28SD,
30158 IX86_BUILTIN_RCP28SS,
30159 IX86_BUILTIN_RSQRT28PD,
30160 IX86_BUILTIN_RSQRT28PS,
30161 IX86_BUILTIN_RSQRT28SD,
30162 IX86_BUILTIN_RSQRT28SS,
30165 IX86_BUILTIN_VPMADD52LUQ512,
30166 IX86_BUILTIN_VPMADD52HUQ512,
30167 IX86_BUILTIN_VPMADD52LUQ256,
30168 IX86_BUILTIN_VPMADD52HUQ256,
30169 IX86_BUILTIN_VPMADD52LUQ128,
30170 IX86_BUILTIN_VPMADD52HUQ128,
30171 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30172 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30173 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30174 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30175 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30176 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30179 IX86_BUILTIN_VPMULTISHIFTQB512,
30180 IX86_BUILTIN_VPMULTISHIFTQB256,
30181 IX86_BUILTIN_VPMULTISHIFTQB128,
30182 IX86_BUILTIN_VPERMVARQI512_MASK,
30183 IX86_BUILTIN_VPERMT2VARQI512,
30184 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30185 IX86_BUILTIN_VPERMI2VARQI512,
30186 IX86_BUILTIN_VPERMVARQI256_MASK,
30187 IX86_BUILTIN_VPERMVARQI128_MASK,
30188 IX86_BUILTIN_VPERMT2VARQI256,
30189 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30190 IX86_BUILTIN_VPERMT2VARQI128,
30191 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30192 IX86_BUILTIN_VPERMI2VARQI256,
30193 IX86_BUILTIN_VPERMI2VARQI128,
30195 /* SHA builtins. */
30196 IX86_BUILTIN_SHA1MSG1,
30197 IX86_BUILTIN_SHA1MSG2,
30198 IX86_BUILTIN_SHA1NEXTE,
30199 IX86_BUILTIN_SHA1RNDS4,
30200 IX86_BUILTIN_SHA256MSG1,
30201 IX86_BUILTIN_SHA256MSG2,
30202 IX86_BUILTIN_SHA256RNDS2,
30204 /* CLWB instructions. */
30207 /* PCOMMIT instructions. */
30208 IX86_BUILTIN_PCOMMIT,
30210 /* CLFLUSHOPT instructions. */
30211 IX86_BUILTIN_CLFLUSHOPT,
30213 /* TFmode support builtins. */
30215 IX86_BUILTIN_HUGE_VALQ,
30216 IX86_BUILTIN_FABSQ,
30217 IX86_BUILTIN_COPYSIGNQ,
30219 /* Vectorizer support builtins. */
30220 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30221 IX86_BUILTIN_CPYSGNPS,
30222 IX86_BUILTIN_CPYSGNPD,
30223 IX86_BUILTIN_CPYSGNPS256,
30224 IX86_BUILTIN_CPYSGNPS512,
30225 IX86_BUILTIN_CPYSGNPD256,
30226 IX86_BUILTIN_CPYSGNPD512,
30227 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30228 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30231 /* FMA4 instructions. */
30232 IX86_BUILTIN_VFMADDSS,
30233 IX86_BUILTIN_VFMADDSD,
30234 IX86_BUILTIN_VFMADDPS,
30235 IX86_BUILTIN_VFMADDPD,
30236 IX86_BUILTIN_VFMADDPS256,
30237 IX86_BUILTIN_VFMADDPD256,
30238 IX86_BUILTIN_VFMADDSUBPS,
30239 IX86_BUILTIN_VFMADDSUBPD,
30240 IX86_BUILTIN_VFMADDSUBPS256,
30241 IX86_BUILTIN_VFMADDSUBPD256,
30243 /* FMA3 instructions. */
30244 IX86_BUILTIN_VFMADDSS3,
30245 IX86_BUILTIN_VFMADDSD3,
30247 /* XOP instructions. */
30248 IX86_BUILTIN_VPCMOV,
30249 IX86_BUILTIN_VPCMOV_V2DI,
30250 IX86_BUILTIN_VPCMOV_V4SI,
30251 IX86_BUILTIN_VPCMOV_V8HI,
30252 IX86_BUILTIN_VPCMOV_V16QI,
30253 IX86_BUILTIN_VPCMOV_V4SF,
30254 IX86_BUILTIN_VPCMOV_V2DF,
30255 IX86_BUILTIN_VPCMOV256,
30256 IX86_BUILTIN_VPCMOV_V4DI256,
30257 IX86_BUILTIN_VPCMOV_V8SI256,
30258 IX86_BUILTIN_VPCMOV_V16HI256,
30259 IX86_BUILTIN_VPCMOV_V32QI256,
30260 IX86_BUILTIN_VPCMOV_V8SF256,
30261 IX86_BUILTIN_VPCMOV_V4DF256,
30263 IX86_BUILTIN_VPPERM,
30265 IX86_BUILTIN_VPMACSSWW,
30266 IX86_BUILTIN_VPMACSWW,
30267 IX86_BUILTIN_VPMACSSWD,
30268 IX86_BUILTIN_VPMACSWD,
30269 IX86_BUILTIN_VPMACSSDD,
30270 IX86_BUILTIN_VPMACSDD,
30271 IX86_BUILTIN_VPMACSSDQL,
30272 IX86_BUILTIN_VPMACSSDQH,
30273 IX86_BUILTIN_VPMACSDQL,
30274 IX86_BUILTIN_VPMACSDQH,
30275 IX86_BUILTIN_VPMADCSSWD,
30276 IX86_BUILTIN_VPMADCSWD,
30278 IX86_BUILTIN_VPHADDBW,
30279 IX86_BUILTIN_VPHADDBD,
30280 IX86_BUILTIN_VPHADDBQ,
30281 IX86_BUILTIN_VPHADDWD,
30282 IX86_BUILTIN_VPHADDWQ,
30283 IX86_BUILTIN_VPHADDDQ,
30284 IX86_BUILTIN_VPHADDUBW,
30285 IX86_BUILTIN_VPHADDUBD,
30286 IX86_BUILTIN_VPHADDUBQ,
30287 IX86_BUILTIN_VPHADDUWD,
30288 IX86_BUILTIN_VPHADDUWQ,
30289 IX86_BUILTIN_VPHADDUDQ,
30290 IX86_BUILTIN_VPHSUBBW,
30291 IX86_BUILTIN_VPHSUBWD,
30292 IX86_BUILTIN_VPHSUBDQ,
30294 IX86_BUILTIN_VPROTB,
30295 IX86_BUILTIN_VPROTW,
30296 IX86_BUILTIN_VPROTD,
30297 IX86_BUILTIN_VPROTQ,
30298 IX86_BUILTIN_VPROTB_IMM,
30299 IX86_BUILTIN_VPROTW_IMM,
30300 IX86_BUILTIN_VPROTD_IMM,
30301 IX86_BUILTIN_VPROTQ_IMM,
30303 IX86_BUILTIN_VPSHLB,
30304 IX86_BUILTIN_VPSHLW,
30305 IX86_BUILTIN_VPSHLD,
30306 IX86_BUILTIN_VPSHLQ,
30307 IX86_BUILTIN_VPSHAB,
30308 IX86_BUILTIN_VPSHAW,
30309 IX86_BUILTIN_VPSHAD,
30310 IX86_BUILTIN_VPSHAQ,
30312 IX86_BUILTIN_VFRCZSS,
30313 IX86_BUILTIN_VFRCZSD,
30314 IX86_BUILTIN_VFRCZPS,
30315 IX86_BUILTIN_VFRCZPD,
30316 IX86_BUILTIN_VFRCZPS256,
30317 IX86_BUILTIN_VFRCZPD256,
30319 IX86_BUILTIN_VPCOMEQUB,
30320 IX86_BUILTIN_VPCOMNEUB,
30321 IX86_BUILTIN_VPCOMLTUB,
30322 IX86_BUILTIN_VPCOMLEUB,
30323 IX86_BUILTIN_VPCOMGTUB,
30324 IX86_BUILTIN_VPCOMGEUB,
30325 IX86_BUILTIN_VPCOMFALSEUB,
30326 IX86_BUILTIN_VPCOMTRUEUB,
30328 IX86_BUILTIN_VPCOMEQUW,
30329 IX86_BUILTIN_VPCOMNEUW,
30330 IX86_BUILTIN_VPCOMLTUW,
30331 IX86_BUILTIN_VPCOMLEUW,
30332 IX86_BUILTIN_VPCOMGTUW,
30333 IX86_BUILTIN_VPCOMGEUW,
30334 IX86_BUILTIN_VPCOMFALSEUW,
30335 IX86_BUILTIN_VPCOMTRUEUW,
30337 IX86_BUILTIN_VPCOMEQUD,
30338 IX86_BUILTIN_VPCOMNEUD,
30339 IX86_BUILTIN_VPCOMLTUD,
30340 IX86_BUILTIN_VPCOMLEUD,
30341 IX86_BUILTIN_VPCOMGTUD,
30342 IX86_BUILTIN_VPCOMGEUD,
30343 IX86_BUILTIN_VPCOMFALSEUD,
30344 IX86_BUILTIN_VPCOMTRUEUD,
30346 IX86_BUILTIN_VPCOMEQUQ,
30347 IX86_BUILTIN_VPCOMNEUQ,
30348 IX86_BUILTIN_VPCOMLTUQ,
30349 IX86_BUILTIN_VPCOMLEUQ,
30350 IX86_BUILTIN_VPCOMGTUQ,
30351 IX86_BUILTIN_VPCOMGEUQ,
30352 IX86_BUILTIN_VPCOMFALSEUQ,
30353 IX86_BUILTIN_VPCOMTRUEUQ,
30355 IX86_BUILTIN_VPCOMEQB,
30356 IX86_BUILTIN_VPCOMNEB,
30357 IX86_BUILTIN_VPCOMLTB,
30358 IX86_BUILTIN_VPCOMLEB,
30359 IX86_BUILTIN_VPCOMGTB,
30360 IX86_BUILTIN_VPCOMGEB,
30361 IX86_BUILTIN_VPCOMFALSEB,
30362 IX86_BUILTIN_VPCOMTRUEB,
30364 IX86_BUILTIN_VPCOMEQW,
30365 IX86_BUILTIN_VPCOMNEW,
30366 IX86_BUILTIN_VPCOMLTW,
30367 IX86_BUILTIN_VPCOMLEW,
30368 IX86_BUILTIN_VPCOMGTW,
30369 IX86_BUILTIN_VPCOMGEW,
30370 IX86_BUILTIN_VPCOMFALSEW,
30371 IX86_BUILTIN_VPCOMTRUEW,
30373 IX86_BUILTIN_VPCOMEQD,
30374 IX86_BUILTIN_VPCOMNED,
30375 IX86_BUILTIN_VPCOMLTD,
30376 IX86_BUILTIN_VPCOMLED,
30377 IX86_BUILTIN_VPCOMGTD,
30378 IX86_BUILTIN_VPCOMGED,
30379 IX86_BUILTIN_VPCOMFALSED,
30380 IX86_BUILTIN_VPCOMTRUED,
30382 IX86_BUILTIN_VPCOMEQQ,
30383 IX86_BUILTIN_VPCOMNEQ,
30384 IX86_BUILTIN_VPCOMLTQ,
30385 IX86_BUILTIN_VPCOMLEQ,
30386 IX86_BUILTIN_VPCOMGTQ,
30387 IX86_BUILTIN_VPCOMGEQ,
30388 IX86_BUILTIN_VPCOMFALSEQ,
30389 IX86_BUILTIN_VPCOMTRUEQ,
30391 /* LWP instructions. */
30392 IX86_BUILTIN_LLWPCB,
30393 IX86_BUILTIN_SLWPCB,
30394 IX86_BUILTIN_LWPVAL32,
30395 IX86_BUILTIN_LWPVAL64,
30396 IX86_BUILTIN_LWPINS32,
30397 IX86_BUILTIN_LWPINS64,
30402 IX86_BUILTIN_XBEGIN,
30404 IX86_BUILTIN_XABORT,
30405 IX86_BUILTIN_XTEST,
30408 IX86_BUILTIN_BNDMK,
30409 IX86_BUILTIN_BNDSTX,
30410 IX86_BUILTIN_BNDLDX,
30411 IX86_BUILTIN_BNDCL,
30412 IX86_BUILTIN_BNDCU,
30413 IX86_BUILTIN_BNDRET,
30414 IX86_BUILTIN_BNDNARROW,
30415 IX86_BUILTIN_BNDINT,
30416 IX86_BUILTIN_SIZEOF,
30417 IX86_BUILTIN_BNDLOWER,
30418 IX86_BUILTIN_BNDUPPER,
30420 /* BMI instructions. */
30421 IX86_BUILTIN_BEXTR32,
30422 IX86_BUILTIN_BEXTR64,
30425 /* TBM instructions. */
30426 IX86_BUILTIN_BEXTRI32,
30427 IX86_BUILTIN_BEXTRI64,
30429 /* BMI2 instructions. */
30430 IX86_BUILTIN_BZHI32,
30431 IX86_BUILTIN_BZHI64,
30432 IX86_BUILTIN_PDEP32,
30433 IX86_BUILTIN_PDEP64,
30434 IX86_BUILTIN_PEXT32,
30435 IX86_BUILTIN_PEXT64,
30437 /* ADX instructions. */
30438 IX86_BUILTIN_ADDCARRYX32,
30439 IX86_BUILTIN_ADDCARRYX64,
30441 /* SBB instructions. */
30442 IX86_BUILTIN_SBB32,
30443 IX86_BUILTIN_SBB64,
30445 /* FSGSBASE instructions. */
30446 IX86_BUILTIN_RDFSBASE32,
30447 IX86_BUILTIN_RDFSBASE64,
30448 IX86_BUILTIN_RDGSBASE32,
30449 IX86_BUILTIN_RDGSBASE64,
30450 IX86_BUILTIN_WRFSBASE32,
30451 IX86_BUILTIN_WRFSBASE64,
30452 IX86_BUILTIN_WRGSBASE32,
30453 IX86_BUILTIN_WRGSBASE64,
30455 /* RDRND instructions. */
30456 IX86_BUILTIN_RDRAND16_STEP,
30457 IX86_BUILTIN_RDRAND32_STEP,
30458 IX86_BUILTIN_RDRAND64_STEP,
30460 /* RDSEED instructions. */
30461 IX86_BUILTIN_RDSEED16_STEP,
30462 IX86_BUILTIN_RDSEED32_STEP,
30463 IX86_BUILTIN_RDSEED64_STEP,
30465 /* F16C instructions. */
30466 IX86_BUILTIN_CVTPH2PS,
30467 IX86_BUILTIN_CVTPH2PS256,
30468 IX86_BUILTIN_CVTPS2PH,
30469 IX86_BUILTIN_CVTPS2PH256,
30471 /* CFString built-in for darwin */
30472 IX86_BUILTIN_CFSTRING,
30474 /* Builtins to get CPU type and supported features. */
30475 IX86_BUILTIN_CPU_INIT,
30476 IX86_BUILTIN_CPU_IS,
30477 IX86_BUILTIN_CPU_SUPPORTS,
30479 /* Read/write FLAGS register built-ins. */
30480 IX86_BUILTIN_READ_FLAGS,
30481 IX86_BUILTIN_WRITE_FLAGS,
30486 /* Table for the ix86 builtin decls. */
30487 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30489 /* Table of all of the builtin functions that are possible with different ISA's
30490 but are waiting to be built until a function is declared to use that
30492 struct builtin_isa {
30493 const char *name; /* function name */
30494 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30495 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30496 bool const_p; /* true if the declaration is constant */
30497 bool leaf_p; /* true if the declaration has leaf attribute */
30498 bool nothrow_p; /* true if the declaration has nothrow attribute */
30499 bool set_and_not_built_p;
30502 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30505 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30506 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30507 function decl in the ix86_builtins array. Returns the function decl or
30508 NULL_TREE, if the builtin was not added.
30510 If the front end has a special hook for builtin functions, delay adding
30511 builtin functions that aren't in the current ISA until the ISA is changed
30512 with function specific optimization. Doing so, can save about 300K for the
30513 default compiler. When the builtin is expanded, check at that time whether
30516 If the front end doesn't have a special hook, record all builtins, even if
30517 it isn't an instruction set in the current ISA in case the user uses
30518 function specific options for a different ISA, so that we don't get scope
30519 errors if a builtin is added in the middle of a function scope. */
30522 def_builtin (HOST_WIDE_INT mask, const char *name,
30523 enum ix86_builtin_func_type tcode,
30524 enum ix86_builtins code)
30526 tree decl = NULL_TREE;
30528 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30530 ix86_builtins_isa[(int) code].isa = mask;
30532 mask &= ~OPTION_MASK_ISA_64BIT;
30534 || (mask & ix86_isa_flags) != 0
30535 || (lang_hooks.builtin_function
30536 == lang_hooks.builtin_function_ext_scope))
30539 tree type = ix86_get_builtin_func_type (tcode);
30540 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30542 ix86_builtins[(int) code] = decl;
30543 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30547 ix86_builtins[(int) code] = NULL_TREE;
30548 ix86_builtins_isa[(int) code].tcode = tcode;
30549 ix86_builtins_isa[(int) code].name = name;
30550 ix86_builtins_isa[(int) code].leaf_p = false;
30551 ix86_builtins_isa[(int) code].nothrow_p = false;
30552 ix86_builtins_isa[(int) code].const_p = false;
30553 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30560 /* Like def_builtin, but also marks the function decl "const". */
30563 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30564 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30566 tree decl = def_builtin (mask, name, tcode, code);
30568 TREE_READONLY (decl) = 1;
30570 ix86_builtins_isa[(int) code].const_p = true;
30575 /* Add any new builtin functions for a given ISA that may not have been
30576 declared. This saves a bit of space compared to adding all of the
30577 declarations to the tree, even if we didn't use them. */
30580 ix86_add_new_builtins (HOST_WIDE_INT isa)
30584 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30586 if ((ix86_builtins_isa[i].isa & isa) != 0
30587 && ix86_builtins_isa[i].set_and_not_built_p)
30591 /* Don't define the builtin again. */
30592 ix86_builtins_isa[i].set_and_not_built_p = false;
30594 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30595 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30596 type, i, BUILT_IN_MD, NULL,
30599 ix86_builtins[i] = decl;
30600 if (ix86_builtins_isa[i].const_p)
30601 TREE_READONLY (decl) = 1;
30602 if (ix86_builtins_isa[i].leaf_p)
30603 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30605 if (ix86_builtins_isa[i].nothrow_p)
30606 TREE_NOTHROW (decl) = 1;
30611 /* Bits for builtin_description.flag. */
30613 /* Set when we don't support the comparison natively, and should
30614 swap_comparison in order to support it. */
30615 #define BUILTIN_DESC_SWAP_OPERANDS 1
30617 struct builtin_description
30619 const HOST_WIDE_INT mask;
30620 const enum insn_code icode;
30621 const char *const name;
30622 const enum ix86_builtins code;
30623 const enum rtx_code comparison;
30627 static const struct builtin_description bdesc_comi[] =
30629 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30630 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30631 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30632 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30633 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30634 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30635 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30636 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30637 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30638 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30639 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30640 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30642 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30643 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30644 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30645 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30646 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30647 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30648 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30649 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30650 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30651 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30652 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30655 static const struct builtin_description bdesc_pcmpestr[] =
30658 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30659 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30660 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30661 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30662 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30663 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30664 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30667 static const struct builtin_description bdesc_pcmpistr[] =
30670 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30671 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30672 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30673 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30674 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30675 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30676 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30679 /* Special builtins with variable number of arguments. */
30680 static const struct builtin_description bdesc_special_args[] =
30682 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30683 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30684 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30686 /* 80387 (for use internally for atomic compound assignment). */
30687 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30688 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30689 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30690 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30693 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30696 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30698 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30699 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30700 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30701 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30702 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30703 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30704 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30705 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30706 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30708 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30709 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30710 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30711 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30712 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30713 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30714 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30715 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30718 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30719 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30720 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30722 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30723 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30724 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30725 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30727 /* SSE or 3DNow!A */
30728 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30729 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
30732 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30733 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30734 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30735 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
30736 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30737 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
30738 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
30739 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
30740 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
30741 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30743 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30744 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30747 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30750 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
30753 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30754 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
30758 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
30760 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30761 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30762 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30763 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
30764 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
30766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30767 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30769 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30770 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30771 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
30772 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30774 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
30775 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30776 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
30779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
30780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
30781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
30782 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
30783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
30784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
30785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
30788 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
30789 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
30790 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
30791 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
30792 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
30793 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
30794 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
30795 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
30796 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
30799 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30800 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30801 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30802 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30803 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30806 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30807 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30808 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30809 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30810 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
30820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
30821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
30822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
30823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30833 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30841 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30844 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30847 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
30848 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
30849 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
30850 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
30851 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
30852 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
30855 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30856 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30857 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30858 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30859 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30860 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30861 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30862 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30865 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30866 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
30867 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
30870 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
30871 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
30872 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
30873 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
30876 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
30877 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
30878 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
30879 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
30880 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30881 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30882 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30883 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30884 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30885 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30886 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30887 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30888 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30889 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30890 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30891 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30892 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30893 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30894 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30895 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30896 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30897 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30898 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30899 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30900 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30901 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30902 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30903 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30904 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30905 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30906 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30907 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30908 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30909 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30910 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30911 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30912 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
30913 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
30914 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
30915 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
30916 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30917 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30918 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30919 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30920 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30921 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30922 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30923 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30924 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30925 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30926 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30927 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30928 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30929 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30930 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30931 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30932 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30933 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30934 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30935 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30936 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30937 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30938 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30939 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30940 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30941 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30942 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30943 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30944 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30945 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30946 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30947 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30948 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30949 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30950 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30951 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30952 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30953 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30954 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30955 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30956 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30957 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30958 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30959 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30960 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30961 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30962 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30963 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30964 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30965 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30966 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30967 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30968 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30969 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30972 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
30975 /* Builtins with variable number of arguments. */
30976 static const struct builtin_description bdesc_args[] =
30978 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
30979 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
30980 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
30981 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
30982 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
30983 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
30984 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
30987 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30988 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30989 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30990 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30991 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30992 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30994 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30995 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30996 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30997 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30998 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30999 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31000 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31001 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31003 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31004 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31006 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31007 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31008 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31009 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31011 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31012 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31013 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31014 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31015 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31016 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31018 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31019 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31020 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31021 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31022 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
31023 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
31025 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31026 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
31027 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31029 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
31031 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31032 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31033 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31034 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31035 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31036 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31038 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31039 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31040 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31041 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31042 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31043 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31045 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31046 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31047 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31048 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31051 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31052 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31053 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31054 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31056 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31057 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31058 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31059 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31060 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31061 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31062 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31063 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31064 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31065 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31066 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31067 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31068 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31069 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31070 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31073 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31074 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31075 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31076 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31077 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31078 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31081 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31082 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31083 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31084 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31085 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31086 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31087 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31088 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31089 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31090 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31091 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31092 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31094 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31096 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31097 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31098 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31099 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31100 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31101 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31102 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31103 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31105 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31106 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31107 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31108 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31109 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31110 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31111 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31112 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31113 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31114 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31115 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31116 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31117 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31118 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31119 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31120 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31121 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31122 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31123 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31124 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31126 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31127 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31128 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31129 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31131 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31132 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31133 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31134 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31136 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31138 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31139 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31140 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31141 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31142 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31144 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31145 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31146 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31148 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31150 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31151 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31152 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31154 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31155 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31157 /* SSE MMX or 3Dnow!A */
31158 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31159 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31160 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31162 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31163 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31164 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31165 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31167 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31168 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31170 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31173 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31175 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
31176 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31177 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31178 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31179 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31181 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31182 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31183 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31184 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31185 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31187 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31189 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31190 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31191 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31192 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31194 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31195 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31196 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31198 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31199 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31200 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31201 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31202 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31203 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31204 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31205 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31207 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31208 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31209 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31210 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31211 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31212 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31213 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31214 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31215 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31216 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31218 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31219 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31220 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31221 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31222 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31223 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31224 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31225 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31226 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31228 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31229 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31230 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31231 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31233 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31234 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31235 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31236 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31238 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31240 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31241 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31242 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31244 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31246 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31247 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31248 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31249 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31250 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31251 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31252 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31253 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31255 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31256 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31257 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31258 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31259 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31260 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31261 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31262 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31264 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31265 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31267 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31268 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31269 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31270 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31272 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31273 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31275 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31276 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31277 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31278 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31279 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31280 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31282 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31283 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31284 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31285 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31287 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31288 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31289 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31290 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31291 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31292 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31293 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31294 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31296 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31297 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31298 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31300 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31301 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31303 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31304 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31308 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31309 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31310 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31311 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31313 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31314 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31315 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31316 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31317 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31318 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31319 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31322 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31323 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31324 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31325 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31326 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31327 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31329 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31330 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31331 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31332 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31334 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31335 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31336 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31338 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31340 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31343 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31344 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31347 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31348 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31350 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31351 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31352 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31353 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31354 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31355 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31358 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31359 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31360 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31361 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31362 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31363 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31365 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31366 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31367 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31368 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31369 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31370 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31371 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31372 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31373 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31374 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31375 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31376 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31377 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31378 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31379 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31380 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31381 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31382 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31383 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31384 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31385 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31386 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31387 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31388 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31391 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31392 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31395 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31396 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31397 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31398 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31399 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31400 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31401 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31402 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31403 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31404 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31406 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31407 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31408 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31409 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31410 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31411 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31412 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31413 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31414 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31415 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31416 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31417 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31418 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31420 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31421 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31422 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31423 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31424 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31425 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31426 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31427 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31428 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31429 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31430 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31431 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31434 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31435 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31436 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31437 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31439 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31440 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31441 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31442 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31444 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31445 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31447 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31448 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31450 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31451 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31452 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31453 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31455 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31456 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31458 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31459 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31461 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31462 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31463 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31466 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31467 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31468 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31469 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31470 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31473 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31474 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31475 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31476 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31479 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31480 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31482 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31483 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31484 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31485 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31488 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31491 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31492 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31493 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31494 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31495 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31496 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31497 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31498 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31499 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31500 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31501 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31502 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31503 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31504 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31505 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31506 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31507 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31508 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31509 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31510 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31511 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31512 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31513 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31514 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31515 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31516 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31518 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31519 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31520 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31521 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31523 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31524 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31525 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31526 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31527 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31528 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31529 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31530 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31531 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31532 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31533 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31534 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31535 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31536 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31537 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31538 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31539 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31540 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31541 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31542 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31543 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31544 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31545 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31546 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31547 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31548 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31549 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31550 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31551 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31552 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31553 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31554 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31555 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31556 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31558 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31559 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31560 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31562 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31563 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31564 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31565 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31566 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31568 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31570 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31571 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31573 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31574 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31575 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31576 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31578 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31579 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31581 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31582 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31584 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31585 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31586 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31587 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31589 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31590 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31592 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31593 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31595 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31596 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31597 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31598 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31600 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31601 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31602 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31603 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31604 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31605 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31607 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31608 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31609 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31610 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31611 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31612 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31613 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31614 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31615 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31616 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31617 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31618 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31619 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31620 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31621 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31623 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31624 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31626 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31627 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31629 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31632 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31633 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31634 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31635 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31636 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31637 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31638 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31639 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31640 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31641 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31642 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31643 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31644 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31645 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31646 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31647 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31648 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31649 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31650 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31651 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31652 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31653 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31654 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31655 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31656 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31657 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31658 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31659 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31660 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31661 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31662 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31663 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31664 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31665 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31666 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31667 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31668 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31669 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31670 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31671 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31672 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31673 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31674 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31675 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31676 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31677 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31678 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31679 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31680 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31681 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31682 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31683 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31684 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31685 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31686 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31687 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31688 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31689 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31690 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31691 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31692 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31693 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31694 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31695 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31696 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31697 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31698 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31699 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31700 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31701 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31702 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31703 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31704 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31705 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31706 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
31707 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31708 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31709 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31710 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31711 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31712 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31713 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31714 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31715 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31716 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31717 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31718 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31719 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31720 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31721 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31722 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31723 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31724 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31725 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31726 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31727 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31728 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31729 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31730 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31731 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31732 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31733 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31734 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31735 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31736 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31737 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31738 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31739 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31740 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31741 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31742 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31743 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31744 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31745 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31746 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31747 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31748 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31749 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31750 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31751 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31752 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31753 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
31754 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
31755 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31756 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31757 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31758 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31759 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31760 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31761 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31762 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31763 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31764 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
31765 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
31766 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
31767 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
31768 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31769 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31770 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31771 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31772 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31773 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31774 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31775 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31776 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31777 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31779 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31782 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31783 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31784 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31787 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31788 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31791 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
31792 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
31793 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
31794 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
31797 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31798 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31799 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31800 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31801 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31802 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
31806 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
31807 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
31808 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
31809 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
31810 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
31811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
31819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
31821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
31822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
31829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
31831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31833 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
31836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
31837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
31838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
31839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
31840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
31841 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
31842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
31843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31844 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31846 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
31860 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
31861 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
31862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
31863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31866 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31868 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31869 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31870 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31871 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31872 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31873 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31874 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31875 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31876 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31877 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31878 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31879 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31880 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31881 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31882 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31888 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31889 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31890 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31891 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31892 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31893 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31894 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31895 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31896 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31897 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31898 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31899 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31900 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31901 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31902 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31903 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31904 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31905 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31906 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31907 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31908 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31909 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31910 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31911 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31912 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31913 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31914 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31915 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31971 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31972 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31973 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31974 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
31983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
31988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
31989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
31990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
31991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31994 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
31998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
31999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
32003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
32004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
32005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32006 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
32008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32011 /* Mask arithmetic operations */
32012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
32015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
32019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
32024 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32025 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32026 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32027 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32028 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32029 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32030 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
32033 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
32034 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
32035 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32036 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32037 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32038 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32039 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32040 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32041 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32042 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32043 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32044 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32045 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32046 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32047 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32048 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32049 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32050 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32051 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32052 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32053 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32054 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32055 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32056 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32057 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32058 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32059 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32060 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32061 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32062 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32063 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32064 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32065 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32066 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32067 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32068 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32069 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32070 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32071 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32072 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32073 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32074 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32075 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32076 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32077 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32078 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32079 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32080 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32081 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32082 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32083 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32084 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32085 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32086 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
32087 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
32088 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
32089 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
32090 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32091 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32092 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32093 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32094 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32095 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32096 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32097 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32098 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32099 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32100 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32101 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32102 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32103 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32104 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32105 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32106 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32107 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32108 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32109 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32110 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32111 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32112 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32113 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32114 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32115 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32116 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32117 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32118 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32119 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32120 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32121 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32122 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
32123 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
32124 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
32125 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
32126 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32127 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
32128 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
32129 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
32130 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32131 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
32132 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32133 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
32134 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32135 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
32136 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32137 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32138 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32139 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32140 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32141 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32142 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32143 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
32144 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
32145 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
32146 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
32147 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32148 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32149 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32150 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32151 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32152 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32153 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32154 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32155 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32156 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32157 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32158 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32159 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32160 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32161 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32162 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32163 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32164 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32165 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32166 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32167 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32168 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32169 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32170 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32171 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32172 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32173 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32174 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32175 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32176 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32177 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32178 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32179 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32180 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32181 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32182 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32183 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32184 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32186 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32187 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32188 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32189 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32191 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32192 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32193 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32194 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32195 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32196 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32197 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32198 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32199 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32200 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32201 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32202 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32205 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32206 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32207 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32208 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32209 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32210 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32211 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32212 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32213 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32214 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32217 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32218 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32219 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32220 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32221 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32222 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32223 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32224 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32225 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32226 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32227 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32228 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32229 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32230 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32231 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32232 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32233 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32234 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32235 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32236 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32237 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32238 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32239 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32240 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32241 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32242 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32243 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32244 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32245 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32246 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32247 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32248 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32249 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32251 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32252 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32253 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32254 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32255 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32256 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32257 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32258 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32259 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32260 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32261 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32262 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32263 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32264 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32265 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32266 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32267 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32268 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32269 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32270 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32271 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32272 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32275 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32276 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32277 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32278 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32279 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32280 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32281 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32282 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32283 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32284 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32285 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32286 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32287 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32288 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32289 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32290 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32291 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32292 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32293 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32294 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32295 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32296 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32297 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32298 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32299 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32300 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32301 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32302 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32303 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32304 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32305 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32306 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32307 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32308 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32309 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32310 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32311 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32312 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32313 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32314 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32315 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32316 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32317 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32318 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32319 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32320 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32321 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32322 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32323 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32324 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32325 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32326 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32327 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32330 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32331 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32334 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32335 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32338 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32339 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32340 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32341 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32342 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32343 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32344 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32345 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32346 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32347 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32348 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32349 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32350 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32351 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32352 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32353 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32354 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32355 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32356 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32357 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32358 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32359 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32360 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32361 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32362 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32363 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32364 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32365 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32366 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32367 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32369 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32370 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32371 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32372 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32373 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32374 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32377 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32378 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32379 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32380 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32381 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32382 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32383 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32385 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32386 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32387 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32388 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32389 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32390 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32391 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32394 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32395 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32396 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32398 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32399 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32400 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32401 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32402 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32403 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32404 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32405 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32406 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32407 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32408 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32409 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32415 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32416 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32417 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32418 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32419 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32420 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32421 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32422 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32423 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32424 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32425 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32426 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32427 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32428 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32429 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32430 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32431 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32432 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32433 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32434 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32435 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32436 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32437 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32439 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32440 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32441 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32442 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32443 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32444 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32445 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32446 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32447 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32448 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32449 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32450 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32451 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32452 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32453 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32454 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32455 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32456 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32457 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32458 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32459 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32460 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32461 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32464 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32465 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32466 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32467 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32468 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32469 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32470 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32471 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32472 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32473 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32474 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32475 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32476 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32477 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32478 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32479 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32480 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32481 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32482 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32483 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32484 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32485 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32486 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32487 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32488 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32489 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32490 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32491 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32494 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32495 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32496 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32497 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32498 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32499 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32500 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32501 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32502 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32503 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32504 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32505 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32506 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32507 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32508 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32509 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32510 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32511 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32512 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32513 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32514 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32515 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32516 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32517 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32518 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32519 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32520 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32521 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32522 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32523 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32524 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32525 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32526 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32527 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32528 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32529 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32530 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32531 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32532 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32533 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32534 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32535 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32536 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32537 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32538 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32539 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32540 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32541 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32542 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32543 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32544 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32545 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32546 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32547 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32548 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32549 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32550 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32551 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32552 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32553 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32554 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32555 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32556 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32557 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32558 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32559 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32560 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32561 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32562 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32563 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32564 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32565 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32566 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32567 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32568 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32569 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32570 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32571 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32572 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32573 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32574 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32575 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32576 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32577 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32578 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32579 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32580 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32581 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32582 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32583 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32584 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32585 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32586 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32587 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32588 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32589 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32590 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32591 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32592 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32593 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32594 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32595 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32596 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32597 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32598 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32599 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32600 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32601 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32602 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32603 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32604 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32605 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32606 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32607 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32608 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32609 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32610 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32611 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32612 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32613 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32614 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32615 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32616 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32617 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32618 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32619 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32620 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32621 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32622 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32623 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32624 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32625 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32626 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32627 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32628 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32629 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32630 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32631 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32632 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32633 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32634 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32635 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32636 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32637 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32638 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32639 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32640 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32641 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32642 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32643 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32644 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32645 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32646 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32647 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32648 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32649 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32650 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32651 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32652 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32653 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32654 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32655 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32656 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32657 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32658 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32659 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32660 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32661 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32662 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32663 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32664 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32665 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32666 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32667 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32668 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32669 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32670 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32671 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32672 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32673 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32674 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32675 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32676 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32677 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32678 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32679 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32680 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32681 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32682 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32683 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32684 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32685 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32686 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32687 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32688 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32689 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32690 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32691 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32692 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32693 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32694 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32695 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32696 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32697 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32698 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32699 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32700 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32701 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32702 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32703 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32704 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32705 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32706 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32707 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32708 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32709 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32710 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32711 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32712 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32713 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32714 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32715 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32716 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32717 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32718 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32719 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32720 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
32721 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
32722 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32723 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32724 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32725 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32726 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32727 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32728 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32729 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32730 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32731 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32732 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32733 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
32734 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
32735 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32736 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32737 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32738 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32739 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32740 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32741 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32742 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32743 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
32744 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
32747 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32748 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32749 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32750 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32751 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
32752 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
32753 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
32754 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
32755 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
32756 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
32757 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32758 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32759 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32760 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32761 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32762 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32763 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32764 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32765 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32766 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
32767 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32768 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
32769 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
32770 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
32771 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
32772 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
32773 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
32774 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
32775 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
32776 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
32777 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
32780 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
32781 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
32782 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32783 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32784 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32785 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32786 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
32787 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
32788 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32789 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32790 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
32791 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
32792 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
32793 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
32794 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
32795 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
32796 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32797 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32798 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32799 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32800 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32801 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32802 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32803 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32804 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32805 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32806 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32807 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32808 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32809 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32810 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32811 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32812 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32813 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32814 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32815 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32816 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32817 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32818 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32819 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32820 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32821 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32822 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32823 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32824 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32825 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32826 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32827 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32828 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32829 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32830 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32831 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32832 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32833 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32834 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32835 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32836 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
32837 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
32838 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32839 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32840 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32841 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32842 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32843 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32844 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32845 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32846 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32847 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32848 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32849 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32850 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
32851 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
32852 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
32853 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
32854 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32855 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32856 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32857 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32858 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32859 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32860 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32861 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32862 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32863 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32864 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32865 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32866 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32867 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32868 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32869 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32870 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32873 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32874 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32875 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32876 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32877 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32878 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32879 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32880 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32881 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32882 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32883 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32884 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32887 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32888 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32889 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32890 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32891 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32892 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32893 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32894 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32895 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32896 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32897 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32898 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32899 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32900 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32901 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32904 /* Builtins with rounding support. */
32905 static const struct builtin_description bdesc_round_args[] =
32908 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32909 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32910 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32911 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32912 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
32913 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
32914 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
32915 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
32916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
32917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
32918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
32921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
32923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
32925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
32927 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
32928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
32929 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
32930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
32931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32936 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
32937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
32938 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
32939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
32988 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
32989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
32990 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
32991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
32992 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
32993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
32994 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
32995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
32996 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
32997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
32998 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
32999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33000 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33002 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
33010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
33011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33029 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33030 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33031 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33032 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33033 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33034 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33035 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33036 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33037 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33038 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33041 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33042 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33043 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33044 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33045 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33046 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33047 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33048 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33049 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33050 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33051 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33052 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33053 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33054 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33055 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
33056 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
33059 /* Bultins for MPX. */
33060 static const struct builtin_description bdesc_mpx[] =
33062 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33063 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33064 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33067 /* Const builtins for MPX. */
33068 static const struct builtin_description bdesc_mpx_const[] =
33070 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33071 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33072 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33073 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33074 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33075 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33076 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33077 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33080 /* FMA4 and XOP. */
33081 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33082 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33083 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33084 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33085 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
33086 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
33087 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
33088 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
33089 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
33090 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
33091 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
33092 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
33093 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
33094 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
33095 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
33096 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
33097 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
33098 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
33099 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
33100 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
33101 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
33102 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
33103 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
33104 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
33105 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
33106 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
33107 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
33108 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
33109 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
33110 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
33111 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
33112 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
33113 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
33114 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
33115 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
33116 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
33117 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
33118 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
33119 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
33120 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
33121 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
33122 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
33123 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
33124 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
33125 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
33126 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
33127 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
33128 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
33129 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
33130 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
33131 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
33132 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
33134 static const struct builtin_description bdesc_multi_arg[] =
33136 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33137 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33138 UNKNOWN, (int)MULTI_ARG_3_SF },
33139 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33140 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33141 UNKNOWN, (int)MULTI_ARG_3_DF },
33143 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33144 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33145 UNKNOWN, (int)MULTI_ARG_3_SF },
33146 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33147 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33148 UNKNOWN, (int)MULTI_ARG_3_DF },
33150 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33151 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33152 UNKNOWN, (int)MULTI_ARG_3_SF },
33153 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33154 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33155 UNKNOWN, (int)MULTI_ARG_3_DF },
33156 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33157 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33158 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33159 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33160 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33161 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33163 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33164 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33165 UNKNOWN, (int)MULTI_ARG_3_SF },
33166 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33167 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33168 UNKNOWN, (int)MULTI_ARG_3_DF },
33169 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33170 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33171 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33172 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33173 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33174 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33176 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
33177 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
33178 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
33179 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
33180 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
33181 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
33182 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
33184 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33185 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33186 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
33187 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
33188 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
33189 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
33190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
33192 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
33194 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33195 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33196 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33202 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33203 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33204 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33207 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33208 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
33209 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
33210 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
33211 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
33212 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
33213 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
33214 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
33215 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33216 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
33217 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
33218 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
33219 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33226 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33227 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33228 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33231 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33232 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33233 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33234 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33235 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33236 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33241 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33242 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33243 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33248 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33250 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33251 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33252 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33255 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33256 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33257 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33258 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33259 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33263 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33265 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33266 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33267 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33268 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33272 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33273 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33274 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33275 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33276 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33277 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33279 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33280 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33281 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33282 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33283 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33284 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33285 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33287 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33288 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33289 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33290 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33291 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33292 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33293 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33295 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33296 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33297 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33298 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33299 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33300 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33301 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33303 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33304 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33305 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33306 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33307 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33308 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33309 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33311 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33312 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33313 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33314 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33315 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33316 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33317 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33318 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33320 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33321 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33322 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33323 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33324 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33325 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33326 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33327 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33329 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33330 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33331 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33332 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33336 /* TM vector builtins. */
33338 /* Reuse the existing x86-specific `struct builtin_description' cause
33339 we're lazy. Add casts to make them fit. */
33340 static const struct builtin_description bdesc_tm[] =
33342 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33343 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33344 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33345 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33346 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33347 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33348 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33350 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33351 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33352 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33353 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33354 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33355 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33356 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33358 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33359 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33360 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33361 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33362 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33363 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33364 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33366 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33367 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33368 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33371 /* TM callbacks. */
33373 /* Return the builtin decl needed to load a vector of TYPE. */
33376 ix86_builtin_tm_load (tree type)
33378 if (TREE_CODE (type) == VECTOR_TYPE)
33380 switch (tree_to_uhwi (TYPE_SIZE (type)))
33383 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33385 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33387 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33393 /* Return the builtin decl needed to store a vector of TYPE. */
33396 ix86_builtin_tm_store (tree type)
33398 if (TREE_CODE (type) == VECTOR_TYPE)
33400 switch (tree_to_uhwi (TYPE_SIZE (type)))
33403 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33405 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33407 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33413 /* Initialize the transactional memory vector load/store builtins. */
33416 ix86_init_tm_builtins (void)
33418 enum ix86_builtin_func_type ftype;
33419 const struct builtin_description *d;
33422 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33423 tree attrs_log, attrs_type_log;
33428 /* If there are no builtins defined, we must be compiling in a
33429 language without trans-mem support. */
33430 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33433 /* Use whatever attributes a normal TM load has. */
33434 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33435 attrs_load = DECL_ATTRIBUTES (decl);
33436 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33437 /* Use whatever attributes a normal TM store has. */
33438 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33439 attrs_store = DECL_ATTRIBUTES (decl);
33440 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33441 /* Use whatever attributes a normal TM log has. */
33442 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33443 attrs_log = DECL_ATTRIBUTES (decl);
33444 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33446 for (i = 0, d = bdesc_tm;
33447 i < ARRAY_SIZE (bdesc_tm);
33450 if ((d->mask & ix86_isa_flags) != 0
33451 || (lang_hooks.builtin_function
33452 == lang_hooks.builtin_function_ext_scope))
33454 tree type, attrs, attrs_type;
33455 enum built_in_function code = (enum built_in_function) d->code;
33457 ftype = (enum ix86_builtin_func_type) d->flag;
33458 type = ix86_get_builtin_func_type (ftype);
33460 if (BUILTIN_TM_LOAD_P (code))
33462 attrs = attrs_load;
33463 attrs_type = attrs_type_load;
33465 else if (BUILTIN_TM_STORE_P (code))
33467 attrs = attrs_store;
33468 attrs_type = attrs_type_store;
33473 attrs_type = attrs_type_log;
33475 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33476 /* The builtin without the prefix for
33477 calling it directly. */
33478 d->name + strlen ("__builtin_"),
33480 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33481 set the TYPE_ATTRIBUTES. */
33482 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33484 set_builtin_decl (code, decl, false);
33489 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33490 in the current target ISA to allow the user to compile particular modules
33491 with different target specific options that differ from the command line
33494 ix86_init_mmx_sse_builtins (void)
33496 const struct builtin_description * d;
33497 enum ix86_builtin_func_type ftype;
33500 /* Add all special builtins with variable number of operands. */
33501 for (i = 0, d = bdesc_special_args;
33502 i < ARRAY_SIZE (bdesc_special_args);
33508 ftype = (enum ix86_builtin_func_type) d->flag;
33509 def_builtin (d->mask, d->name, ftype, d->code);
33512 /* Add all builtins with variable number of operands. */
33513 for (i = 0, d = bdesc_args;
33514 i < ARRAY_SIZE (bdesc_args);
33520 ftype = (enum ix86_builtin_func_type) d->flag;
33521 def_builtin_const (d->mask, d->name, ftype, d->code);
33524 /* Add all builtins with rounding. */
33525 for (i = 0, d = bdesc_round_args;
33526 i < ARRAY_SIZE (bdesc_round_args);
33532 ftype = (enum ix86_builtin_func_type) d->flag;
33533 def_builtin_const (d->mask, d->name, ftype, d->code);
33536 /* pcmpestr[im] insns. */
33537 for (i = 0, d = bdesc_pcmpestr;
33538 i < ARRAY_SIZE (bdesc_pcmpestr);
33541 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33542 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33544 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33545 def_builtin_const (d->mask, d->name, ftype, d->code);
33548 /* pcmpistr[im] insns. */
33549 for (i = 0, d = bdesc_pcmpistr;
33550 i < ARRAY_SIZE (bdesc_pcmpistr);
33553 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33554 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33556 ftype = INT_FTYPE_V16QI_V16QI_INT;
33557 def_builtin_const (d->mask, d->name, ftype, d->code);
33560 /* comi/ucomi insns. */
33561 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33563 if (d->mask == OPTION_MASK_ISA_SSE2)
33564 ftype = INT_FTYPE_V2DF_V2DF;
33566 ftype = INT_FTYPE_V4SF_V4SF;
33567 def_builtin_const (d->mask, d->name, ftype, d->code);
33571 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33572 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33573 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33574 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33576 /* SSE or 3DNow!A */
33577 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33578 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33579 IX86_BUILTIN_MASKMOVQ);
33582 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33583 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33585 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33586 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33587 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33588 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33591 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33592 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33593 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33594 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33597 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33598 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33599 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33600 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33601 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33602 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33603 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33604 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33605 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33606 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33607 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33608 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33611 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33612 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33615 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33616 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33617 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33618 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33619 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33620 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33621 IX86_BUILTIN_RDRAND64_STEP);
33624 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33625 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33626 IX86_BUILTIN_GATHERSIV2DF);
33628 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33629 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33630 IX86_BUILTIN_GATHERSIV4DF);
33632 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33633 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33634 IX86_BUILTIN_GATHERDIV2DF);
33636 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33637 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33638 IX86_BUILTIN_GATHERDIV4DF);
33640 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33641 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33642 IX86_BUILTIN_GATHERSIV4SF);
33644 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33645 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33646 IX86_BUILTIN_GATHERSIV8SF);
33648 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33649 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33650 IX86_BUILTIN_GATHERDIV4SF);
33652 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33653 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33654 IX86_BUILTIN_GATHERDIV8SF);
33656 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33657 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33658 IX86_BUILTIN_GATHERSIV2DI);
33660 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33661 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33662 IX86_BUILTIN_GATHERSIV4DI);
33664 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33665 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33666 IX86_BUILTIN_GATHERDIV2DI);
33668 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33669 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33670 IX86_BUILTIN_GATHERDIV4DI);
33672 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33673 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33674 IX86_BUILTIN_GATHERSIV4SI);
33676 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33677 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33678 IX86_BUILTIN_GATHERSIV8SI);
33680 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33681 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33682 IX86_BUILTIN_GATHERDIV4SI);
33684 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33685 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33686 IX86_BUILTIN_GATHERDIV8SI);
33688 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33689 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33690 IX86_BUILTIN_GATHERALTSIV4DF);
33692 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33693 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33694 IX86_BUILTIN_GATHERALTDIV8SF);
33696 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33697 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33698 IX86_BUILTIN_GATHERALTSIV4DI);
33700 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33701 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33702 IX86_BUILTIN_GATHERALTDIV8SI);
33705 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33706 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
33707 IX86_BUILTIN_GATHER3SIV16SF);
33709 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
33710 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
33711 IX86_BUILTIN_GATHER3SIV8DF);
33713 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
33714 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
33715 IX86_BUILTIN_GATHER3DIV16SF);
33717 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
33718 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
33719 IX86_BUILTIN_GATHER3DIV8DF);
33721 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
33722 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
33723 IX86_BUILTIN_GATHER3SIV16SI);
33725 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
33726 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
33727 IX86_BUILTIN_GATHER3SIV8DI);
33729 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
33730 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
33731 IX86_BUILTIN_GATHER3DIV16SI);
33733 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
33734 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
33735 IX86_BUILTIN_GATHER3DIV8DI);
33737 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
33738 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
33739 IX86_BUILTIN_GATHER3ALTSIV8DF);
33741 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
33742 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
33743 IX86_BUILTIN_GATHER3ALTDIV16SF);
33745 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
33746 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
33747 IX86_BUILTIN_GATHER3ALTSIV8DI);
33749 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
33750 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
33751 IX86_BUILTIN_GATHER3ALTDIV16SI);
33753 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
33754 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
33755 IX86_BUILTIN_SCATTERSIV16SF);
33757 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
33758 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
33759 IX86_BUILTIN_SCATTERSIV8DF);
33761 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
33762 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
33763 IX86_BUILTIN_SCATTERDIV16SF);
33765 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
33766 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
33767 IX86_BUILTIN_SCATTERDIV8DF);
33769 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
33770 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
33771 IX86_BUILTIN_SCATTERSIV16SI);
33773 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
33774 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
33775 IX86_BUILTIN_SCATTERSIV8DI);
33777 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
33778 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
33779 IX86_BUILTIN_SCATTERDIV16SI);
33781 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
33782 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
33783 IX86_BUILTIN_SCATTERDIV8DI);
33786 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
33787 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
33788 IX86_BUILTIN_GATHER3SIV2DF);
33790 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
33791 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
33792 IX86_BUILTIN_GATHER3SIV4DF);
33794 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
33795 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
33796 IX86_BUILTIN_GATHER3DIV2DF);
33798 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
33799 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
33800 IX86_BUILTIN_GATHER3DIV4DF);
33802 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
33803 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
33804 IX86_BUILTIN_GATHER3SIV4SF);
33806 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
33807 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
33808 IX86_BUILTIN_GATHER3SIV8SF);
33810 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
33811 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
33812 IX86_BUILTIN_GATHER3DIV4SF);
33814 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
33815 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
33816 IX86_BUILTIN_GATHER3DIV8SF);
33818 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
33819 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
33820 IX86_BUILTIN_GATHER3SIV2DI);
33822 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
33823 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
33824 IX86_BUILTIN_GATHER3SIV4DI);
33826 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
33827 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
33828 IX86_BUILTIN_GATHER3DIV2DI);
33830 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
33831 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
33832 IX86_BUILTIN_GATHER3DIV4DI);
33834 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
33835 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
33836 IX86_BUILTIN_GATHER3SIV4SI);
33838 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
33839 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
33840 IX86_BUILTIN_GATHER3SIV8SI);
33842 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
33843 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
33844 IX86_BUILTIN_GATHER3DIV4SI);
33846 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
33847 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
33848 IX86_BUILTIN_GATHER3DIV8SI);
33850 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
33851 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
33852 IX86_BUILTIN_GATHER3ALTSIV4DF);
33854 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
33855 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
33856 IX86_BUILTIN_GATHER3ALTDIV8SF);
33858 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
33859 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
33860 IX86_BUILTIN_GATHER3ALTSIV4DI);
33862 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
33863 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
33864 IX86_BUILTIN_GATHER3ALTDIV8SI);
33866 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
33867 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
33868 IX86_BUILTIN_SCATTERSIV8SF);
33870 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
33871 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
33872 IX86_BUILTIN_SCATTERSIV4SF);
33874 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
33875 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
33876 IX86_BUILTIN_SCATTERSIV4DF);
33878 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
33879 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
33880 IX86_BUILTIN_SCATTERSIV2DF);
33882 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
33883 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
33884 IX86_BUILTIN_SCATTERDIV8SF);
33886 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
33887 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
33888 IX86_BUILTIN_SCATTERDIV4SF);
33890 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
33891 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
33892 IX86_BUILTIN_SCATTERDIV4DF);
33894 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
33895 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
33896 IX86_BUILTIN_SCATTERDIV2DF);
33898 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
33899 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
33900 IX86_BUILTIN_SCATTERSIV8SI);
33902 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
33903 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
33904 IX86_BUILTIN_SCATTERSIV4SI);
33906 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
33907 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
33908 IX86_BUILTIN_SCATTERSIV4DI);
33910 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
33911 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
33912 IX86_BUILTIN_SCATTERSIV2DI);
33914 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
33915 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
33916 IX86_BUILTIN_SCATTERDIV8SI);
33918 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
33919 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
33920 IX86_BUILTIN_SCATTERDIV4SI);
33922 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
33923 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
33924 IX86_BUILTIN_SCATTERDIV4DI);
33926 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
33927 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
33928 IX86_BUILTIN_SCATTERDIV2DI);
33931 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
33932 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33933 IX86_BUILTIN_GATHERPFDPD);
33934 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
33935 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33936 IX86_BUILTIN_GATHERPFDPS);
33937 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
33938 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33939 IX86_BUILTIN_GATHERPFQPD);
33940 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
33941 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33942 IX86_BUILTIN_GATHERPFQPS);
33943 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
33944 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33945 IX86_BUILTIN_SCATTERPFDPD);
33946 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
33947 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33948 IX86_BUILTIN_SCATTERPFDPS);
33949 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
33950 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33951 IX86_BUILTIN_SCATTERPFQPD);
33952 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
33953 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33954 IX86_BUILTIN_SCATTERPFQPS);
33957 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
33958 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
33959 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
33960 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
33961 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
33962 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
33963 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
33964 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
33965 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
33966 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
33967 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
33968 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
33969 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
33970 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
33973 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
33974 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
33976 /* MMX access to the vec_init patterns. */
33977 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
33978 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
33980 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
33981 V4HI_FTYPE_HI_HI_HI_HI,
33982 IX86_BUILTIN_VEC_INIT_V4HI);
33984 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
33985 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
33986 IX86_BUILTIN_VEC_INIT_V8QI);
33988 /* Access to the vec_extract patterns. */
33989 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
33990 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
33991 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
33992 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
33993 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
33994 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
33995 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
33996 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
33997 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
33998 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
34000 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34001 "__builtin_ia32_vec_ext_v4hi",
34002 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
34004 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
34005 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
34007 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
34008 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
34010 /* Access to the vec_set patterns. */
34011 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
34012 "__builtin_ia32_vec_set_v2di",
34013 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
34015 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
34016 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
34018 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
34019 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
34021 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
34022 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
34024 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34025 "__builtin_ia32_vec_set_v4hi",
34026 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
34028 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
34029 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
34032 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
34033 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
34034 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
34035 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
34036 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
34037 "__builtin_ia32_rdseed_di_step",
34038 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
34041 def_builtin (0, "__builtin_ia32_addcarryx_u32",
34042 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
34043 def_builtin (OPTION_MASK_ISA_64BIT,
34044 "__builtin_ia32_addcarryx_u64",
34045 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34046 IX86_BUILTIN_ADDCARRYX64);
34049 def_builtin (0, "__builtin_ia32_sbb_u32",
34050 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
34051 def_builtin (OPTION_MASK_ISA_64BIT,
34052 "__builtin_ia32_sbb_u64",
34053 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34054 IX86_BUILTIN_SBB64);
34056 /* Read/write FLAGS. */
34057 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
34058 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34059 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
34060 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34061 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
34062 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34063 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34064 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34067 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34068 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34071 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34072 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34074 /* Add FMA4 multi-arg argument instructions */
34075 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34080 ftype = (enum ix86_builtin_func_type) d->flag;
34081 def_builtin_const (d->mask, d->name, ftype, d->code);
34086 ix86_init_mpx_builtins ()
34088 const struct builtin_description * d;
34089 enum ix86_builtin_func_type ftype;
34093 for (i = 0, d = bdesc_mpx;
34094 i < ARRAY_SIZE (bdesc_mpx);
34100 ftype = (enum ix86_builtin_func_type) d->flag;
34101 decl = def_builtin (d->mask, d->name, ftype, d->code);
34103 /* With no leaf and nothrow flags for MPX builtins
34104 abnormal edges may follow its call when setjmp
34105 presents in the function. Since we may have a lot
34106 of MPX builtins calls it causes lots of useless
34107 edges and enormous PHI nodes. To avoid this we mark
34108 MPX builtins as leaf and nothrow. */
34111 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34113 TREE_NOTHROW (decl) = 1;
34117 ix86_builtins_isa[(int)d->code].leaf_p = true;
34118 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34122 for (i = 0, d = bdesc_mpx_const;
34123 i < ARRAY_SIZE (bdesc_mpx_const);
34129 ftype = (enum ix86_builtin_func_type) d->flag;
34130 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34134 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34136 TREE_NOTHROW (decl) = 1;
34140 ix86_builtins_isa[(int)d->code].leaf_p = true;
34141 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34146 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34147 to return a pointer to VERSION_DECL if the outcome of the expression
34148 formed by PREDICATE_CHAIN is true. This function will be called during
34149 version dispatch to decide which function version to execute. It returns
34150 the basic block at the end, to which more conditions can be added. */
34153 add_condition_to_bb (tree function_decl, tree version_decl,
34154 tree predicate_chain, basic_block new_bb)
34156 gimple return_stmt;
34157 tree convert_expr, result_var;
34158 gimple convert_stmt;
34159 gimple call_cond_stmt;
34160 gimple if_else_stmt;
34162 basic_block bb1, bb2, bb3;
34165 tree cond_var, and_expr_var = NULL_TREE;
34168 tree predicate_decl, predicate_arg;
34170 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34172 gcc_assert (new_bb != NULL);
34173 gseq = bb_seq (new_bb);
34176 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34177 build_fold_addr_expr (version_decl));
34178 result_var = create_tmp_var (ptr_type_node);
34179 convert_stmt = gimple_build_assign (result_var, convert_expr);
34180 return_stmt = gimple_build_return (result_var);
34182 if (predicate_chain == NULL_TREE)
34184 gimple_seq_add_stmt (&gseq, convert_stmt);
34185 gimple_seq_add_stmt (&gseq, return_stmt);
34186 set_bb_seq (new_bb, gseq);
34187 gimple_set_bb (convert_stmt, new_bb);
34188 gimple_set_bb (return_stmt, new_bb);
34193 while (predicate_chain != NULL)
34195 cond_var = create_tmp_var (integer_type_node);
34196 predicate_decl = TREE_PURPOSE (predicate_chain);
34197 predicate_arg = TREE_VALUE (predicate_chain);
34198 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34199 gimple_call_set_lhs (call_cond_stmt, cond_var);
34201 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34202 gimple_set_bb (call_cond_stmt, new_bb);
34203 gimple_seq_add_stmt (&gseq, call_cond_stmt);
34205 predicate_chain = TREE_CHAIN (predicate_chain);
34207 if (and_expr_var == NULL)
34208 and_expr_var = cond_var;
34211 gimple assign_stmt;
34212 /* Use MIN_EXPR to check if any integer is zero?.
34213 and_expr_var = min_expr <cond_var, and_expr_var> */
34214 assign_stmt = gimple_build_assign (and_expr_var,
34215 build2 (MIN_EXPR, integer_type_node,
34216 cond_var, and_expr_var));
34218 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34219 gimple_set_bb (assign_stmt, new_bb);
34220 gimple_seq_add_stmt (&gseq, assign_stmt);
34224 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34226 NULL_TREE, NULL_TREE);
34227 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34228 gimple_set_bb (if_else_stmt, new_bb);
34229 gimple_seq_add_stmt (&gseq, if_else_stmt);
34231 gimple_seq_add_stmt (&gseq, convert_stmt);
34232 gimple_seq_add_stmt (&gseq, return_stmt);
34233 set_bb_seq (new_bb, gseq);
34236 e12 = split_block (bb1, if_else_stmt);
34238 e12->flags &= ~EDGE_FALLTHRU;
34239 e12->flags |= EDGE_TRUE_VALUE;
34241 e23 = split_block (bb2, return_stmt);
34243 gimple_set_bb (convert_stmt, bb2);
34244 gimple_set_bb (return_stmt, bb2);
34247 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34250 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34257 /* This parses the attribute arguments to target in DECL and determines
34258 the right builtin to use to match the platform specification.
34259 It returns the priority value for this version decl. If PREDICATE_LIST
34260 is not NULL, it stores the list of cpu features that need to be checked
34261 before dispatching this function. */
34263 static unsigned int
34264 get_builtin_code_for_version (tree decl, tree *predicate_list)
34267 struct cl_target_option cur_target;
34269 struct cl_target_option *new_target;
34270 const char *arg_str = NULL;
34271 const char *attrs_str = NULL;
34272 char *tok_str = NULL;
34275 /* Priority of i386 features, greater value is higher priority. This is
34276 used to decide the order in which function dispatch must happen. For
34277 instance, a version specialized for SSE4.2 should be checked for dispatch
34278 before a version for SSE3, as SSE4.2 implies SSE3. */
34279 enum feature_priority
34310 enum feature_priority priority = P_ZERO;
34312 /* These are the target attribute strings for which a dispatcher is
34313 available, from fold_builtin_cpu. */
34315 static struct _feature_list
34317 const char *const name;
34318 const enum feature_priority priority;
34320 const feature_list[] =
34326 {"sse4a", P_SSE4_A},
34327 {"ssse3", P_SSSE3},
34328 {"sse4.1", P_SSE4_1},
34329 {"sse4.2", P_SSE4_2},
34330 {"popcnt", P_POPCNT},
34338 {"avx512f", P_AVX512F}
34342 static unsigned int NUM_FEATURES
34343 = sizeof (feature_list) / sizeof (struct _feature_list);
34347 tree predicate_chain = NULL_TREE;
34348 tree predicate_decl, predicate_arg;
34350 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34351 gcc_assert (attrs != NULL);
34353 attrs = TREE_VALUE (TREE_VALUE (attrs));
34355 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34356 attrs_str = TREE_STRING_POINTER (attrs);
34358 /* Return priority zero for default function. */
34359 if (strcmp (attrs_str, "default") == 0)
34362 /* Handle arch= if specified. For priority, set it to be 1 more than
34363 the best instruction set the processor can handle. For instance, if
34364 there is a version for atom and a version for ssse3 (the highest ISA
34365 priority for atom), the atom version must be checked for dispatch
34366 before the ssse3 version. */
34367 if (strstr (attrs_str, "arch=") != NULL)
34369 cl_target_option_save (&cur_target, &global_options);
34370 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34371 &global_options_set);
34373 gcc_assert (target_node);
34374 new_target = TREE_TARGET_OPTION (target_node);
34375 gcc_assert (new_target);
34377 if (new_target->arch_specified && new_target->arch > 0)
34379 switch (new_target->arch)
34381 case PROCESSOR_CORE2:
34383 priority = P_PROC_SSSE3;
34385 case PROCESSOR_NEHALEM:
34386 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34387 arg_str = "westmere";
34389 /* We translate "arch=corei7" and "arch=nehalem" to
34390 "corei7" so that it will be mapped to M_INTEL_COREI7
34391 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34392 arg_str = "corei7";
34393 priority = P_PROC_SSE4_2;
34395 case PROCESSOR_SANDYBRIDGE:
34396 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34397 arg_str = "ivybridge";
34399 arg_str = "sandybridge";
34400 priority = P_PROC_AVX;
34402 case PROCESSOR_HASWELL:
34403 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34404 arg_str = "broadwell";
34406 arg_str = "haswell";
34407 priority = P_PROC_AVX2;
34409 case PROCESSOR_BONNELL:
34410 arg_str = "bonnell";
34411 priority = P_PROC_SSSE3;
34413 case PROCESSOR_KNL:
34415 priority = P_PROC_AVX512F;
34417 case PROCESSOR_SILVERMONT:
34418 arg_str = "silvermont";
34419 priority = P_PROC_SSE4_2;
34421 case PROCESSOR_AMDFAM10:
34422 arg_str = "amdfam10h";
34423 priority = P_PROC_SSE4_A;
34425 case PROCESSOR_BTVER1:
34426 arg_str = "btver1";
34427 priority = P_PROC_SSE4_A;
34429 case PROCESSOR_BTVER2:
34430 arg_str = "btver2";
34431 priority = P_PROC_BMI;
34433 case PROCESSOR_BDVER1:
34434 arg_str = "bdver1";
34435 priority = P_PROC_XOP;
34437 case PROCESSOR_BDVER2:
34438 arg_str = "bdver2";
34439 priority = P_PROC_FMA;
34441 case PROCESSOR_BDVER3:
34442 arg_str = "bdver3";
34443 priority = P_PROC_FMA;
34445 case PROCESSOR_BDVER4:
34446 arg_str = "bdver4";
34447 priority = P_PROC_AVX2;
34452 cl_target_option_restore (&global_options, &cur_target);
34454 if (predicate_list && arg_str == NULL)
34456 error_at (DECL_SOURCE_LOCATION (decl),
34457 "No dispatcher found for the versioning attributes");
34461 if (predicate_list)
34463 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34464 /* For a C string literal the length includes the trailing NULL. */
34465 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34466 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34471 /* Process feature name. */
34472 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34473 strcpy (tok_str, attrs_str);
34474 token = strtok (tok_str, ",");
34475 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34477 while (token != NULL)
34479 /* Do not process "arch=" */
34480 if (strncmp (token, "arch=", 5) == 0)
34482 token = strtok (NULL, ",");
34485 for (i = 0; i < NUM_FEATURES; ++i)
34487 if (strcmp (token, feature_list[i].name) == 0)
34489 if (predicate_list)
34491 predicate_arg = build_string_literal (
34492 strlen (feature_list[i].name) + 1,
34493 feature_list[i].name);
34494 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34497 /* Find the maximum priority feature. */
34498 if (feature_list[i].priority > priority)
34499 priority = feature_list[i].priority;
34504 if (predicate_list && i == NUM_FEATURES)
34506 error_at (DECL_SOURCE_LOCATION (decl),
34507 "No dispatcher found for %s", token);
34510 token = strtok (NULL, ",");
34514 if (predicate_list && predicate_chain == NULL_TREE)
34516 error_at (DECL_SOURCE_LOCATION (decl),
34517 "No dispatcher found for the versioning attributes : %s",
34521 else if (predicate_list)
34523 predicate_chain = nreverse (predicate_chain);
34524 *predicate_list = predicate_chain;
34530 /* This compares the priority of target features in function DECL1
34531 and DECL2. It returns positive value if DECL1 is higher priority,
34532 negative value if DECL2 is higher priority and 0 if they are the
34536 ix86_compare_version_priority (tree decl1, tree decl2)
34538 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34539 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34541 return (int)priority1 - (int)priority2;
34544 /* V1 and V2 point to function versions with different priorities
34545 based on the target ISA. This function compares their priorities. */
34548 feature_compare (const void *v1, const void *v2)
34550 typedef struct _function_version_info
34553 tree predicate_chain;
34554 unsigned int dispatch_priority;
34555 } function_version_info;
34557 const function_version_info c1 = *(const function_version_info *)v1;
34558 const function_version_info c2 = *(const function_version_info *)v2;
34559 return (c2.dispatch_priority - c1.dispatch_priority);
34562 /* This function generates the dispatch function for
34563 multi-versioned functions. DISPATCH_DECL is the function which will
34564 contain the dispatch logic. FNDECLS are the function choices for
34565 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34566 in DISPATCH_DECL in which the dispatch code is generated. */
34569 dispatch_function_versions (tree dispatch_decl,
34571 basic_block *empty_bb)
34574 gimple ifunc_cpu_init_stmt;
34578 vec<tree> *fndecls;
34579 unsigned int num_versions = 0;
34580 unsigned int actual_versions = 0;
34583 struct _function_version_info
34586 tree predicate_chain;
34587 unsigned int dispatch_priority;
34588 }*function_version_info;
34590 gcc_assert (dispatch_decl != NULL
34591 && fndecls_p != NULL
34592 && empty_bb != NULL);
34594 /*fndecls_p is actually a vector. */
34595 fndecls = static_cast<vec<tree> *> (fndecls_p);
34597 /* At least one more version other than the default. */
34598 num_versions = fndecls->length ();
34599 gcc_assert (num_versions >= 2);
34601 function_version_info = (struct _function_version_info *)
34602 XNEWVEC (struct _function_version_info, (num_versions - 1));
34604 /* The first version in the vector is the default decl. */
34605 default_decl = (*fndecls)[0];
34607 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34609 gseq = bb_seq (*empty_bb);
34610 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34611 constructors, so explicity call __builtin_cpu_init here. */
34612 ifunc_cpu_init_stmt = gimple_build_call_vec (
34613 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34614 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34615 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34616 set_bb_seq (*empty_bb, gseq);
34621 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34623 tree version_decl = ele;
34624 tree predicate_chain = NULL_TREE;
34625 unsigned int priority;
34626 /* Get attribute string, parse it and find the right predicate decl.
34627 The predicate function could be a lengthy combination of many
34628 features, like arch-type and various isa-variants. */
34629 priority = get_builtin_code_for_version (version_decl,
34632 if (predicate_chain == NULL_TREE)
34635 function_version_info [actual_versions].version_decl = version_decl;
34636 function_version_info [actual_versions].predicate_chain
34638 function_version_info [actual_versions].dispatch_priority = priority;
34642 /* Sort the versions according to descending order of dispatch priority. The
34643 priority is based on the ISA. This is not a perfect solution. There
34644 could still be ambiguity. If more than one function version is suitable
34645 to execute, which one should be dispatched? In future, allow the user
34646 to specify a dispatch priority next to the version. */
34647 qsort (function_version_info, actual_versions,
34648 sizeof (struct _function_version_info), feature_compare);
34650 for (i = 0; i < actual_versions; ++i)
34651 *empty_bb = add_condition_to_bb (dispatch_decl,
34652 function_version_info[i].version_decl,
34653 function_version_info[i].predicate_chain,
34656 /* dispatch default version at the end. */
34657 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34660 free (function_version_info);
34664 /* Comparator function to be used in qsort routine to sort attribute
34665 specification strings to "target". */
34668 attr_strcmp (const void *v1, const void *v2)
34670 const char *c1 = *(char *const*)v1;
34671 const char *c2 = *(char *const*)v2;
34672 return strcmp (c1, c2);
34675 /* ARGLIST is the argument to target attribute. This function tokenizes
34676 the comma separated arguments, sorts them and returns a string which
34677 is a unique identifier for the comma separated arguments. It also
34678 replaces non-identifier characters "=,-" with "_". */
34681 sorted_attr_string (tree arglist)
34684 size_t str_len_sum = 0;
34685 char **args = NULL;
34686 char *attr_str, *ret_str;
34688 unsigned int argnum = 1;
34691 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34693 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34694 size_t len = strlen (str);
34695 str_len_sum += len + 1;
34696 if (arg != arglist)
34698 for (i = 0; i < strlen (str); i++)
34703 attr_str = XNEWVEC (char, str_len_sum);
34705 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34707 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34708 size_t len = strlen (str);
34709 memcpy (attr_str + str_len_sum, str, len);
34710 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
34711 str_len_sum += len + 1;
34714 /* Replace "=,-" with "_". */
34715 for (i = 0; i < strlen (attr_str); i++)
34716 if (attr_str[i] == '=' || attr_str[i]== '-')
34722 args = XNEWVEC (char *, argnum);
34725 attr = strtok (attr_str, ",");
34726 while (attr != NULL)
34730 attr = strtok (NULL, ",");
34733 qsort (args, argnum, sizeof (char *), attr_strcmp);
34735 ret_str = XNEWVEC (char, str_len_sum);
34737 for (i = 0; i < argnum; i++)
34739 size_t len = strlen (args[i]);
34740 memcpy (ret_str + str_len_sum, args[i], len);
34741 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
34742 str_len_sum += len + 1;
34746 XDELETEVEC (attr_str);
34750 /* This function changes the assembler name for functions that are
34751 versions. If DECL is a function version and has a "target"
34752 attribute, it appends the attribute string to its assembler name. */
34755 ix86_mangle_function_version_assembler_name (tree decl, tree id)
34758 const char *orig_name, *version_string;
34759 char *attr_str, *assembler_name;
34761 if (DECL_DECLARED_INLINE_P (decl)
34762 && lookup_attribute ("gnu_inline",
34763 DECL_ATTRIBUTES (decl)))
34764 error_at (DECL_SOURCE_LOCATION (decl),
34765 "Function versions cannot be marked as gnu_inline,"
34766 " bodies have to be generated");
34768 if (DECL_VIRTUAL_P (decl)
34769 || DECL_VINDEX (decl))
34770 sorry ("Virtual function multiversioning not supported");
34772 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34774 /* target attribute string cannot be NULL. */
34775 gcc_assert (version_attr != NULL_TREE);
34777 orig_name = IDENTIFIER_POINTER (id);
34779 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
34781 if (strcmp (version_string, "default") == 0)
34784 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
34785 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
34787 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
34789 /* Allow assembler name to be modified if already set. */
34790 if (DECL_ASSEMBLER_NAME_SET_P (decl))
34791 SET_DECL_RTL (decl, NULL);
34793 tree ret = get_identifier (assembler_name);
34794 XDELETEVEC (attr_str);
34795 XDELETEVEC (assembler_name);
34799 /* This function returns true if FN1 and FN2 are versions of the same function,
34800 that is, the target strings of the function decls are different. This assumes
34801 that FN1 and FN2 have the same signature. */
34804 ix86_function_versions (tree fn1, tree fn2)
34807 char *target1, *target2;
34810 if (TREE_CODE (fn1) != FUNCTION_DECL
34811 || TREE_CODE (fn2) != FUNCTION_DECL)
34814 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
34815 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
34817 /* At least one function decl should have the target attribute specified. */
34818 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
34821 /* Diagnose missing target attribute if one of the decls is already
34822 multi-versioned. */
34823 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
34825 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
34827 if (attr2 != NULL_TREE)
34834 error_at (DECL_SOURCE_LOCATION (fn2),
34835 "missing %<target%> attribute for multi-versioned %D",
34837 inform (DECL_SOURCE_LOCATION (fn1),
34838 "previous declaration of %D", fn1);
34839 /* Prevent diagnosing of the same error multiple times. */
34840 DECL_ATTRIBUTES (fn2)
34841 = tree_cons (get_identifier ("target"),
34842 copy_node (TREE_VALUE (attr1)),
34843 DECL_ATTRIBUTES (fn2));
34848 target1 = sorted_attr_string (TREE_VALUE (attr1));
34849 target2 = sorted_attr_string (TREE_VALUE (attr2));
34851 /* The sorted target strings must be different for fn1 and fn2
34853 if (strcmp (target1, target2) == 0)
34858 XDELETEVEC (target1);
34859 XDELETEVEC (target2);
34865 ix86_mangle_decl_assembler_name (tree decl, tree id)
34867 /* For function version, add the target suffix to the assembler name. */
34868 if (TREE_CODE (decl) == FUNCTION_DECL
34869 && DECL_FUNCTION_VERSIONED (decl))
34870 id = ix86_mangle_function_version_assembler_name (decl, id);
34871 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
34872 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
34878 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
34879 is true, append the full path name of the source file. */
34882 make_name (tree decl, const char *suffix, bool make_unique)
34884 char *global_var_name;
34887 const char *unique_name = NULL;
34889 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
34891 /* Get a unique name that can be used globally without any chances
34892 of collision at link time. */
34894 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
34896 name_len = strlen (name) + strlen (suffix) + 2;
34899 name_len += strlen (unique_name) + 1;
34900 global_var_name = XNEWVEC (char, name_len);
34902 /* Use '.' to concatenate names as it is demangler friendly. */
34904 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
34907 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
34909 return global_var_name;
34912 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
34914 /* Make a dispatcher declaration for the multi-versioned function DECL.
34915 Calls to DECL function will be replaced with calls to the dispatcher
34916 by the front-end. Return the decl created. */
34919 make_dispatcher_decl (const tree decl)
34923 tree fn_type, func_type;
34924 bool is_uniq = false;
34926 if (TREE_PUBLIC (decl) == 0)
34929 func_name = make_name (decl, "ifunc", is_uniq);
34931 fn_type = TREE_TYPE (decl);
34932 func_type = build_function_type (TREE_TYPE (fn_type),
34933 TYPE_ARG_TYPES (fn_type));
34935 func_decl = build_fn_decl (func_name, func_type);
34936 XDELETEVEC (func_name);
34937 TREE_USED (func_decl) = 1;
34938 DECL_CONTEXT (func_decl) = NULL_TREE;
34939 DECL_INITIAL (func_decl) = error_mark_node;
34940 DECL_ARTIFICIAL (func_decl) = 1;
34941 /* Mark this func as external, the resolver will flip it again if
34942 it gets generated. */
34943 DECL_EXTERNAL (func_decl) = 1;
34944 /* This will be of type IFUNCs have to be externally visible. */
34945 TREE_PUBLIC (func_decl) = 1;
34952 /* Returns true if decl is multi-versioned and DECL is the default function,
34953 that is it is not tagged with target specific optimization. */
34956 is_function_default_version (const tree decl)
34958 if (TREE_CODE (decl) != FUNCTION_DECL
34959 || !DECL_FUNCTION_VERSIONED (decl))
34961 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34963 attr = TREE_VALUE (TREE_VALUE (attr));
34964 return (TREE_CODE (attr) == STRING_CST
34965 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
34968 /* Make a dispatcher declaration for the multi-versioned function DECL.
34969 Calls to DECL function will be replaced with calls to the dispatcher
34970 by the front-end. Returns the decl of the dispatcher function. */
34973 ix86_get_function_versions_dispatcher (void *decl)
34975 tree fn = (tree) decl;
34976 struct cgraph_node *node = NULL;
34977 struct cgraph_node *default_node = NULL;
34978 struct cgraph_function_version_info *node_v = NULL;
34979 struct cgraph_function_version_info *first_v = NULL;
34981 tree dispatch_decl = NULL;
34983 struct cgraph_function_version_info *default_version_info = NULL;
34985 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
34987 node = cgraph_node::get (fn);
34988 gcc_assert (node != NULL);
34990 node_v = node->function_version ();
34991 gcc_assert (node_v != NULL);
34993 if (node_v->dispatcher_resolver != NULL)
34994 return node_v->dispatcher_resolver;
34996 /* Find the default version and make it the first node. */
34998 /* Go to the beginning of the chain. */
34999 while (first_v->prev != NULL)
35000 first_v = first_v->prev;
35001 default_version_info = first_v;
35002 while (default_version_info != NULL)
35004 if (is_function_default_version
35005 (default_version_info->this_node->decl))
35007 default_version_info = default_version_info->next;
35010 /* If there is no default node, just return NULL. */
35011 if (default_version_info == NULL)
35014 /* Make default info the first node. */
35015 if (first_v != default_version_info)
35017 default_version_info->prev->next = default_version_info->next;
35018 if (default_version_info->next)
35019 default_version_info->next->prev = default_version_info->prev;
35020 first_v->prev = default_version_info;
35021 default_version_info->next = first_v;
35022 default_version_info->prev = NULL;
35025 default_node = default_version_info->this_node;
35027 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35028 if (targetm.has_ifunc_p ())
35030 struct cgraph_function_version_info *it_v = NULL;
35031 struct cgraph_node *dispatcher_node = NULL;
35032 struct cgraph_function_version_info *dispatcher_version_info = NULL;
35034 /* Right now, the dispatching is done via ifunc. */
35035 dispatch_decl = make_dispatcher_decl (default_node->decl);
35037 dispatcher_node = cgraph_node::get_create (dispatch_decl);
35038 gcc_assert (dispatcher_node != NULL);
35039 dispatcher_node->dispatcher_function = 1;
35040 dispatcher_version_info
35041 = dispatcher_node->insert_new_function_version ();
35042 dispatcher_version_info->next = default_version_info;
35043 dispatcher_node->definition = 1;
35045 /* Set the dispatcher for all the versions. */
35046 it_v = default_version_info;
35047 while (it_v != NULL)
35049 it_v->dispatcher_resolver = dispatch_decl;
35056 error_at (DECL_SOURCE_LOCATION (default_node->decl),
35057 "multiversioning needs ifunc which is not supported "
35061 return dispatch_decl;
35064 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
35068 make_attribute (const char *name, const char *arg_name, tree chain)
35071 tree attr_arg_name;
35075 attr_name = get_identifier (name);
35076 attr_arg_name = build_string (strlen (arg_name), arg_name);
35077 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35078 attr = tree_cons (attr_name, attr_args, chain);
35082 /* Make the resolver function decl to dispatch the versions of
35083 a multi-versioned function, DEFAULT_DECL. Create an
35084 empty basic block in the resolver and store the pointer in
35085 EMPTY_BB. Return the decl of the resolver function. */
35088 make_resolver_func (const tree default_decl,
35089 const tree dispatch_decl,
35090 basic_block *empty_bb)
35092 char *resolver_name;
35093 tree decl, type, decl_name, t;
35094 bool is_uniq = false;
35096 /* IFUNC's have to be globally visible. So, if the default_decl is
35097 not, then the name of the IFUNC should be made unique. */
35098 if (TREE_PUBLIC (default_decl) == 0)
35101 /* Append the filename to the resolver function if the versions are
35102 not externally visible. This is because the resolver function has
35103 to be externally visible for the loader to find it. So, appending
35104 the filename will prevent conflicts with a resolver function from
35105 another module which is based on the same version name. */
35106 resolver_name = make_name (default_decl, "resolver", is_uniq);
35108 /* The resolver function should return a (void *). */
35109 type = build_function_type_list (ptr_type_node, NULL_TREE);
35111 decl = build_fn_decl (resolver_name, type);
35112 decl_name = get_identifier (resolver_name);
35113 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35115 DECL_NAME (decl) = decl_name;
35116 TREE_USED (decl) = 1;
35117 DECL_ARTIFICIAL (decl) = 1;
35118 DECL_IGNORED_P (decl) = 0;
35119 /* IFUNC resolvers have to be externally visible. */
35120 TREE_PUBLIC (decl) = 1;
35121 DECL_UNINLINABLE (decl) = 1;
35123 /* Resolver is not external, body is generated. */
35124 DECL_EXTERNAL (decl) = 0;
35125 DECL_EXTERNAL (dispatch_decl) = 0;
35127 DECL_CONTEXT (decl) = NULL_TREE;
35128 DECL_INITIAL (decl) = make_node (BLOCK);
35129 DECL_STATIC_CONSTRUCTOR (decl) = 0;
35131 if (DECL_COMDAT_GROUP (default_decl)
35132 || TREE_PUBLIC (default_decl))
35134 /* In this case, each translation unit with a call to this
35135 versioned function will put out a resolver. Ensure it
35136 is comdat to keep just one copy. */
35137 DECL_COMDAT (decl) = 1;
35138 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35140 /* Build result decl and add to function_decl. */
35141 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35142 DECL_ARTIFICIAL (t) = 1;
35143 DECL_IGNORED_P (t) = 1;
35144 DECL_RESULT (decl) = t;
35146 gimplify_function_tree (decl);
35147 push_cfun (DECL_STRUCT_FUNCTION (decl));
35148 *empty_bb = init_lowered_empty_function (decl, false);
35150 cgraph_node::add_new_function (decl, true);
35151 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35155 gcc_assert (dispatch_decl != NULL);
35156 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
35157 DECL_ATTRIBUTES (dispatch_decl)
35158 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35160 /* Create the alias for dispatch to resolver here. */
35161 /*cgraph_create_function_alias (dispatch_decl, decl);*/
35162 cgraph_node::create_same_body_alias (dispatch_decl, decl);
35163 XDELETEVEC (resolver_name);
35167 /* Generate the dispatching code body to dispatch multi-versioned function
35168 DECL. The target hook is called to process the "target" attributes and
35169 provide the code to dispatch the right function at run-time. NODE points
35170 to the dispatcher decl whose body will be created. */
35173 ix86_generate_version_dispatcher_body (void *node_p)
35175 tree resolver_decl;
35176 basic_block empty_bb;
35177 tree default_ver_decl;
35178 struct cgraph_node *versn;
35179 struct cgraph_node *node;
35181 struct cgraph_function_version_info *node_version_info = NULL;
35182 struct cgraph_function_version_info *versn_info = NULL;
35184 node = (cgraph_node *)node_p;
35186 node_version_info = node->function_version ();
35187 gcc_assert (node->dispatcher_function
35188 && node_version_info != NULL);
35190 if (node_version_info->dispatcher_resolver)
35191 return node_version_info->dispatcher_resolver;
35193 /* The first version in the chain corresponds to the default version. */
35194 default_ver_decl = node_version_info->next->this_node->decl;
35196 /* node is going to be an alias, so remove the finalized bit. */
35197 node->definition = false;
35199 resolver_decl = make_resolver_func (default_ver_decl,
35200 node->decl, &empty_bb);
35202 node_version_info->dispatcher_resolver = resolver_decl;
35204 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35206 auto_vec<tree, 2> fn_ver_vec;
35208 for (versn_info = node_version_info->next; versn_info;
35209 versn_info = versn_info->next)
35211 versn = versn_info->this_node;
35212 /* Check for virtual functions here again, as by this time it should
35213 have been determined if this function needs a vtable index or
35214 not. This happens for methods in derived classes that override
35215 virtual methods in base classes but are not explicitly marked as
35217 if (DECL_VINDEX (versn->decl))
35218 sorry ("Virtual function multiversioning not supported");
35220 fn_ver_vec.safe_push (versn->decl);
35223 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35224 cgraph_edge::rebuild_edges ();
35226 return resolver_decl;
35228 /* This builds the processor_model struct type defined in
35229 libgcc/config/i386/cpuinfo.c */
35232 build_processor_model_struct (void)
35234 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35236 tree field = NULL_TREE, field_chain = NULL_TREE;
35238 tree type = make_node (RECORD_TYPE);
35240 /* The first 3 fields are unsigned int. */
35241 for (i = 0; i < 3; ++i)
35243 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35244 get_identifier (field_name[i]), unsigned_type_node);
35245 if (field_chain != NULL_TREE)
35246 DECL_CHAIN (field) = field_chain;
35247 field_chain = field;
35250 /* The last field is an array of unsigned integers of size one. */
35251 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35252 get_identifier (field_name[3]),
35253 build_array_type (unsigned_type_node,
35254 build_index_type (size_one_node)));
35255 if (field_chain != NULL_TREE)
35256 DECL_CHAIN (field) = field_chain;
35257 field_chain = field;
35259 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35263 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35266 make_var_decl (tree type, const char *name)
35270 new_decl = build_decl (UNKNOWN_LOCATION,
35272 get_identifier(name),
35275 DECL_EXTERNAL (new_decl) = 1;
35276 TREE_STATIC (new_decl) = 1;
35277 TREE_PUBLIC (new_decl) = 1;
35278 DECL_INITIAL (new_decl) = 0;
35279 DECL_ARTIFICIAL (new_decl) = 0;
35280 DECL_PRESERVE_P (new_decl) = 1;
35282 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35283 assemble_variable (new_decl, 0, 0, 0);
35288 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35289 into an integer defined in libgcc/config/i386/cpuinfo.c */
35292 fold_builtin_cpu (tree fndecl, tree *args)
35295 enum ix86_builtins fn_code = (enum ix86_builtins)
35296 DECL_FUNCTION_CODE (fndecl);
35297 tree param_string_cst = NULL;
35299 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35300 enum processor_features
35323 /* These are the values for vendor types and cpu types and subtypes
35324 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35325 the corresponding start value. */
35326 enum processor_model
35336 M_INTEL_SILVERMONT,
35340 M_CPU_SUBTYPE_START,
35341 M_INTEL_COREI7_NEHALEM,
35342 M_INTEL_COREI7_WESTMERE,
35343 M_INTEL_COREI7_SANDYBRIDGE,
35344 M_AMDFAM10H_BARCELONA,
35345 M_AMDFAM10H_SHANGHAI,
35346 M_AMDFAM10H_ISTANBUL,
35347 M_AMDFAM15H_BDVER1,
35348 M_AMDFAM15H_BDVER2,
35349 M_AMDFAM15H_BDVER3,
35350 M_AMDFAM15H_BDVER4,
35351 M_INTEL_COREI7_IVYBRIDGE,
35352 M_INTEL_COREI7_HASWELL,
35353 M_INTEL_COREI7_BROADWELL
35356 static struct _arch_names_table
35358 const char *const name;
35359 const enum processor_model model;
35361 const arch_names_table[] =
35364 {"intel", M_INTEL},
35365 {"atom", M_INTEL_BONNELL},
35366 {"slm", M_INTEL_SILVERMONT},
35367 {"core2", M_INTEL_CORE2},
35368 {"corei7", M_INTEL_COREI7},
35369 {"nehalem", M_INTEL_COREI7_NEHALEM},
35370 {"westmere", M_INTEL_COREI7_WESTMERE},
35371 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35372 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35373 {"haswell", M_INTEL_COREI7_HASWELL},
35374 {"broadwell", M_INTEL_COREI7_BROADWELL},
35375 {"bonnell", M_INTEL_BONNELL},
35376 {"silvermont", M_INTEL_SILVERMONT},
35377 {"knl", M_INTEL_KNL},
35378 {"amdfam10h", M_AMDFAM10H},
35379 {"barcelona", M_AMDFAM10H_BARCELONA},
35380 {"shanghai", M_AMDFAM10H_SHANGHAI},
35381 {"istanbul", M_AMDFAM10H_ISTANBUL},
35382 {"btver1", M_AMD_BTVER1},
35383 {"amdfam15h", M_AMDFAM15H},
35384 {"bdver1", M_AMDFAM15H_BDVER1},
35385 {"bdver2", M_AMDFAM15H_BDVER2},
35386 {"bdver3", M_AMDFAM15H_BDVER3},
35387 {"bdver4", M_AMDFAM15H_BDVER4},
35388 {"btver2", M_AMD_BTVER2},
35391 static struct _isa_names_table
35393 const char *const name;
35394 const enum processor_features feature;
35396 const isa_names_table[] =
35400 {"popcnt", F_POPCNT},
35404 {"ssse3", F_SSSE3},
35405 {"sse4a", F_SSE4_A},
35406 {"sse4.1", F_SSE4_1},
35407 {"sse4.2", F_SSE4_2},
35413 {"avx512f",F_AVX512F},
35418 tree __processor_model_type = build_processor_model_struct ();
35419 tree __cpu_model_var = make_var_decl (__processor_model_type,
35423 varpool_node::add (__cpu_model_var);
35425 gcc_assert ((args != NULL) && (*args != NULL));
35427 param_string_cst = *args;
35428 while (param_string_cst
35429 && TREE_CODE (param_string_cst) != STRING_CST)
35431 /* *args must be a expr that can contain other EXPRS leading to a
35433 if (!EXPR_P (param_string_cst))
35435 error ("Parameter to builtin must be a string constant or literal");
35436 return integer_zero_node;
35438 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35441 gcc_assert (param_string_cst);
35443 if (fn_code == IX86_BUILTIN_CPU_IS)
35449 unsigned int field_val = 0;
35450 unsigned int NUM_ARCH_NAMES
35451 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35453 for (i = 0; i < NUM_ARCH_NAMES; i++)
35454 if (strcmp (arch_names_table[i].name,
35455 TREE_STRING_POINTER (param_string_cst)) == 0)
35458 if (i == NUM_ARCH_NAMES)
35460 error ("Parameter to builtin not valid: %s",
35461 TREE_STRING_POINTER (param_string_cst));
35462 return integer_zero_node;
35465 field = TYPE_FIELDS (__processor_model_type);
35466 field_val = arch_names_table[i].model;
35468 /* CPU types are stored in the next field. */
35469 if (field_val > M_CPU_TYPE_START
35470 && field_val < M_CPU_SUBTYPE_START)
35472 field = DECL_CHAIN (field);
35473 field_val -= M_CPU_TYPE_START;
35476 /* CPU subtypes are stored in the next field. */
35477 if (field_val > M_CPU_SUBTYPE_START)
35479 field = DECL_CHAIN ( DECL_CHAIN (field));
35480 field_val -= M_CPU_SUBTYPE_START;
35483 /* Get the appropriate field in __cpu_model. */
35484 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35487 /* Check the value. */
35488 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35489 build_int_cstu (unsigned_type_node, field_val));
35490 return build1 (CONVERT_EXPR, integer_type_node, final);
35492 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35499 unsigned int field_val = 0;
35500 unsigned int NUM_ISA_NAMES
35501 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35503 for (i = 0; i < NUM_ISA_NAMES; i++)
35504 if (strcmp (isa_names_table[i].name,
35505 TREE_STRING_POINTER (param_string_cst)) == 0)
35508 if (i == NUM_ISA_NAMES)
35510 error ("Parameter to builtin not valid: %s",
35511 TREE_STRING_POINTER (param_string_cst));
35512 return integer_zero_node;
35515 field = TYPE_FIELDS (__processor_model_type);
35516 /* Get the last field, which is __cpu_features. */
35517 while (DECL_CHAIN (field))
35518 field = DECL_CHAIN (field);
35520 /* Get the appropriate field: __cpu_model.__cpu_features */
35521 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35524 /* Access the 0th element of __cpu_features array. */
35525 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35526 integer_zero_node, NULL_TREE, NULL_TREE);
35528 field_val = (1 << isa_names_table[i].feature);
35529 /* Return __cpu_model.__cpu_features[0] & field_val */
35530 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35531 build_int_cstu (unsigned_type_node, field_val));
35532 return build1 (CONVERT_EXPR, integer_type_node, final);
35534 gcc_unreachable ();
35538 ix86_fold_builtin (tree fndecl, int n_args,
35539 tree *args, bool ignore ATTRIBUTE_UNUSED)
35541 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35543 enum ix86_builtins fn_code = (enum ix86_builtins)
35544 DECL_FUNCTION_CODE (fndecl);
35545 if (fn_code == IX86_BUILTIN_CPU_IS
35546 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35548 gcc_assert (n_args == 1);
35549 return fold_builtin_cpu (fndecl, args);
35553 #ifdef SUBTARGET_FOLD_BUILTIN
35554 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35560 /* Make builtins to detect cpu type and features supported. NAME is
35561 the builtin name, CODE is the builtin code, and FTYPE is the function
35562 type of the builtin. */
35565 make_cpu_type_builtin (const char* name, int code,
35566 enum ix86_builtin_func_type ftype, bool is_const)
35571 type = ix86_get_builtin_func_type (ftype);
35572 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35574 gcc_assert (decl != NULL_TREE);
35575 ix86_builtins[(int) code] = decl;
35576 TREE_READONLY (decl) = is_const;
35579 /* Make builtins to get CPU type and features supported. The created
35582 __builtin_cpu_init (), to detect cpu type and features,
35583 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35584 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35588 ix86_init_platform_type_builtins (void)
35590 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35591 INT_FTYPE_VOID, false);
35592 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35593 INT_FTYPE_PCCHAR, true);
35594 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35595 INT_FTYPE_PCCHAR, true);
35598 /* Internal method for ix86_init_builtins. */
35601 ix86_init_builtins_va_builtins_abi (void)
35603 tree ms_va_ref, sysv_va_ref;
35604 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35605 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35606 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35607 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35611 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35612 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35613 ms_va_ref = build_reference_type (ms_va_list_type_node);
35615 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35618 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35619 fnvoid_va_start_ms =
35620 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35621 fnvoid_va_end_sysv =
35622 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35623 fnvoid_va_start_sysv =
35624 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35626 fnvoid_va_copy_ms =
35627 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35629 fnvoid_va_copy_sysv =
35630 build_function_type_list (void_type_node, sysv_va_ref,
35631 sysv_va_ref, NULL_TREE);
35633 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35634 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35635 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35636 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35637 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35638 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35639 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35640 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35641 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35642 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35643 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35644 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35648 ix86_init_builtin_types (void)
35650 tree float128_type_node, float80_type_node;
35652 /* The __float80 type. */
35653 float80_type_node = long_double_type_node;
35654 if (TYPE_MODE (float80_type_node) != XFmode)
35656 /* The __float80 type. */
35657 float80_type_node = make_node (REAL_TYPE);
35659 TYPE_PRECISION (float80_type_node) = 80;
35660 layout_type (float80_type_node);
35662 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35664 /* The __float128 type. */
35665 float128_type_node = make_node (REAL_TYPE);
35666 TYPE_PRECISION (float128_type_node) = 128;
35667 layout_type (float128_type_node);
35668 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35670 /* This macro is built by i386-builtin-types.awk. */
35671 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35675 ix86_init_builtins (void)
35679 ix86_init_builtin_types ();
35681 /* Builtins to get CPU type and features. */
35682 ix86_init_platform_type_builtins ();
35684 /* TFmode support builtins. */
35685 def_builtin_const (0, "__builtin_infq",
35686 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35687 def_builtin_const (0, "__builtin_huge_valq",
35688 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
35690 /* We will expand them to normal call if SSE isn't available since
35691 they are used by libgcc. */
35692 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
35693 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
35694 BUILT_IN_MD, "__fabstf2", NULL_TREE);
35695 TREE_READONLY (t) = 1;
35696 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
35698 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
35699 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
35700 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
35701 TREE_READONLY (t) = 1;
35702 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
35704 ix86_init_tm_builtins ();
35705 ix86_init_mmx_sse_builtins ();
35706 ix86_init_mpx_builtins ();
35709 ix86_init_builtins_va_builtins_abi ();
35711 #ifdef SUBTARGET_INIT_BUILTINS
35712 SUBTARGET_INIT_BUILTINS;
35716 /* Return the ix86 builtin for CODE. */
35719 ix86_builtin_decl (unsigned code, bool)
35721 if (code >= IX86_BUILTIN_MAX)
35722 return error_mark_node;
35724 return ix86_builtins[code];
35727 /* Errors in the source file can cause expand_expr to return const0_rtx
35728 where we expect a vector. To avoid crashing, use one of the vector
35729 clear instructions. */
35731 safe_vector_operand (rtx x, machine_mode mode)
35733 if (x == const0_rtx)
35734 x = CONST0_RTX (mode);
35738 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
35741 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
35744 tree arg0 = CALL_EXPR_ARG (exp, 0);
35745 tree arg1 = CALL_EXPR_ARG (exp, 1);
35746 rtx op0 = expand_normal (arg0);
35747 rtx op1 = expand_normal (arg1);
35748 machine_mode tmode = insn_data[icode].operand[0].mode;
35749 machine_mode mode0 = insn_data[icode].operand[1].mode;
35750 machine_mode mode1 = insn_data[icode].operand[2].mode;
35752 if (VECTOR_MODE_P (mode0))
35753 op0 = safe_vector_operand (op0, mode0);
35754 if (VECTOR_MODE_P (mode1))
35755 op1 = safe_vector_operand (op1, mode1);
35757 if (optimize || !target
35758 || GET_MODE (target) != tmode
35759 || !insn_data[icode].operand[0].predicate (target, tmode))
35760 target = gen_reg_rtx (tmode);
35762 if (GET_MODE (op1) == SImode && mode1 == TImode)
35764 rtx x = gen_reg_rtx (V4SImode);
35765 emit_insn (gen_sse2_loadd (x, op1));
35766 op1 = gen_lowpart (TImode, x);
35769 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35770 op0 = copy_to_mode_reg (mode0, op0);
35771 if (!insn_data[icode].operand[2].predicate (op1, mode1))
35772 op1 = copy_to_mode_reg (mode1, op1);
35774 pat = GEN_FCN (icode) (target, op0, op1);
35783 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
35786 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
35787 enum ix86_builtin_func_type m_type,
35788 enum rtx_code sub_code)
35793 bool comparison_p = false;
35795 bool last_arg_constant = false;
35796 int num_memory = 0;
35802 machine_mode tmode = insn_data[icode].operand[0].mode;
35806 case MULTI_ARG_4_DF2_DI_I:
35807 case MULTI_ARG_4_DF2_DI_I1:
35808 case MULTI_ARG_4_SF2_SI_I:
35809 case MULTI_ARG_4_SF2_SI_I1:
35811 last_arg_constant = true;
35814 case MULTI_ARG_3_SF:
35815 case MULTI_ARG_3_DF:
35816 case MULTI_ARG_3_SF2:
35817 case MULTI_ARG_3_DF2:
35818 case MULTI_ARG_3_DI:
35819 case MULTI_ARG_3_SI:
35820 case MULTI_ARG_3_SI_DI:
35821 case MULTI_ARG_3_HI:
35822 case MULTI_ARG_3_HI_SI:
35823 case MULTI_ARG_3_QI:
35824 case MULTI_ARG_3_DI2:
35825 case MULTI_ARG_3_SI2:
35826 case MULTI_ARG_3_HI2:
35827 case MULTI_ARG_3_QI2:
35831 case MULTI_ARG_2_SF:
35832 case MULTI_ARG_2_DF:
35833 case MULTI_ARG_2_DI:
35834 case MULTI_ARG_2_SI:
35835 case MULTI_ARG_2_HI:
35836 case MULTI_ARG_2_QI:
35840 case MULTI_ARG_2_DI_IMM:
35841 case MULTI_ARG_2_SI_IMM:
35842 case MULTI_ARG_2_HI_IMM:
35843 case MULTI_ARG_2_QI_IMM:
35845 last_arg_constant = true;
35848 case MULTI_ARG_1_SF:
35849 case MULTI_ARG_1_DF:
35850 case MULTI_ARG_1_SF2:
35851 case MULTI_ARG_1_DF2:
35852 case MULTI_ARG_1_DI:
35853 case MULTI_ARG_1_SI:
35854 case MULTI_ARG_1_HI:
35855 case MULTI_ARG_1_QI:
35856 case MULTI_ARG_1_SI_DI:
35857 case MULTI_ARG_1_HI_DI:
35858 case MULTI_ARG_1_HI_SI:
35859 case MULTI_ARG_1_QI_DI:
35860 case MULTI_ARG_1_QI_SI:
35861 case MULTI_ARG_1_QI_HI:
35865 case MULTI_ARG_2_DI_CMP:
35866 case MULTI_ARG_2_SI_CMP:
35867 case MULTI_ARG_2_HI_CMP:
35868 case MULTI_ARG_2_QI_CMP:
35870 comparison_p = true;
35873 case MULTI_ARG_2_SF_TF:
35874 case MULTI_ARG_2_DF_TF:
35875 case MULTI_ARG_2_DI_TF:
35876 case MULTI_ARG_2_SI_TF:
35877 case MULTI_ARG_2_HI_TF:
35878 case MULTI_ARG_2_QI_TF:
35884 gcc_unreachable ();
35887 if (optimize || !target
35888 || GET_MODE (target) != tmode
35889 || !insn_data[icode].operand[0].predicate (target, tmode))
35890 target = gen_reg_rtx (tmode);
35892 gcc_assert (nargs <= 4);
35894 for (i = 0; i < nargs; i++)
35896 tree arg = CALL_EXPR_ARG (exp, i);
35897 rtx op = expand_normal (arg);
35898 int adjust = (comparison_p) ? 1 : 0;
35899 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
35901 if (last_arg_constant && i == nargs - 1)
35903 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
35905 enum insn_code new_icode = icode;
35908 case CODE_FOR_xop_vpermil2v2df3:
35909 case CODE_FOR_xop_vpermil2v4sf3:
35910 case CODE_FOR_xop_vpermil2v4df3:
35911 case CODE_FOR_xop_vpermil2v8sf3:
35912 error ("the last argument must be a 2-bit immediate");
35913 return gen_reg_rtx (tmode);
35914 case CODE_FOR_xop_rotlv2di3:
35915 new_icode = CODE_FOR_rotlv2di3;
35917 case CODE_FOR_xop_rotlv4si3:
35918 new_icode = CODE_FOR_rotlv4si3;
35920 case CODE_FOR_xop_rotlv8hi3:
35921 new_icode = CODE_FOR_rotlv8hi3;
35923 case CODE_FOR_xop_rotlv16qi3:
35924 new_icode = CODE_FOR_rotlv16qi3;
35926 if (CONST_INT_P (op))
35928 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
35929 op = GEN_INT (INTVAL (op) & mask);
35930 gcc_checking_assert
35931 (insn_data[icode].operand[i + 1].predicate (op, mode));
35935 gcc_checking_assert
35937 && insn_data[new_icode].operand[0].mode == tmode
35938 && insn_data[new_icode].operand[1].mode == tmode
35939 && insn_data[new_icode].operand[2].mode == mode
35940 && insn_data[new_icode].operand[0].predicate
35941 == insn_data[icode].operand[0].predicate
35942 && insn_data[new_icode].operand[1].predicate
35943 == insn_data[icode].operand[1].predicate);
35949 gcc_unreachable ();
35956 if (VECTOR_MODE_P (mode))
35957 op = safe_vector_operand (op, mode);
35959 /* If we aren't optimizing, only allow one memory operand to be
35961 if (memory_operand (op, mode))
35964 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
35967 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
35969 op = force_reg (mode, op);
35973 args[i].mode = mode;
35979 pat = GEN_FCN (icode) (target, args[0].op);
35984 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35985 GEN_INT ((int)sub_code));
35986 else if (! comparison_p)
35987 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
35990 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
35994 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
35999 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
36003 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
36007 gcc_unreachable ();
36017 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
36018 insns with vec_merge. */
36021 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
36025 tree arg0 = CALL_EXPR_ARG (exp, 0);
36026 rtx op1, op0 = expand_normal (arg0);
36027 machine_mode tmode = insn_data[icode].operand[0].mode;
36028 machine_mode mode0 = insn_data[icode].operand[1].mode;
36030 if (optimize || !target
36031 || GET_MODE (target) != tmode
36032 || !insn_data[icode].operand[0].predicate (target, tmode))
36033 target = gen_reg_rtx (tmode);
36035 if (VECTOR_MODE_P (mode0))
36036 op0 = safe_vector_operand (op0, mode0);
36038 if ((optimize && !register_operand (op0, mode0))
36039 || !insn_data[icode].operand[1].predicate (op0, mode0))
36040 op0 = copy_to_mode_reg (mode0, op0);
36043 if (!insn_data[icode].operand[2].predicate (op1, mode0))
36044 op1 = copy_to_mode_reg (mode0, op1);
36046 pat = GEN_FCN (icode) (target, op0, op1);
36053 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
36056 ix86_expand_sse_compare (const struct builtin_description *d,
36057 tree exp, rtx target, bool swap)
36060 tree arg0 = CALL_EXPR_ARG (exp, 0);
36061 tree arg1 = CALL_EXPR_ARG (exp, 1);
36062 rtx op0 = expand_normal (arg0);
36063 rtx op1 = expand_normal (arg1);
36065 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36066 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36067 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36068 enum rtx_code comparison = d->comparison;
36070 if (VECTOR_MODE_P (mode0))
36071 op0 = safe_vector_operand (op0, mode0);
36072 if (VECTOR_MODE_P (mode1))
36073 op1 = safe_vector_operand (op1, mode1);
36075 /* Swap operands if we have a comparison that isn't available in
36078 std::swap (op0, op1);
36080 if (optimize || !target
36081 || GET_MODE (target) != tmode
36082 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36083 target = gen_reg_rtx (tmode);
36085 if ((optimize && !register_operand (op0, mode0))
36086 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36087 op0 = copy_to_mode_reg (mode0, op0);
36088 if ((optimize && !register_operand (op1, mode1))
36089 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36090 op1 = copy_to_mode_reg (mode1, op1);
36092 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36093 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36100 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
36103 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36107 tree arg0 = CALL_EXPR_ARG (exp, 0);
36108 tree arg1 = CALL_EXPR_ARG (exp, 1);
36109 rtx op0 = expand_normal (arg0);
36110 rtx op1 = expand_normal (arg1);
36111 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36112 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36113 enum rtx_code comparison = d->comparison;
36115 if (VECTOR_MODE_P (mode0))
36116 op0 = safe_vector_operand (op0, mode0);
36117 if (VECTOR_MODE_P (mode1))
36118 op1 = safe_vector_operand (op1, mode1);
36120 /* Swap operands if we have a comparison that isn't available in
36122 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36123 std::swap (op0, op1);
36125 target = gen_reg_rtx (SImode);
36126 emit_move_insn (target, const0_rtx);
36127 target = gen_rtx_SUBREG (QImode, target, 0);
36129 if ((optimize && !register_operand (op0, mode0))
36130 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36131 op0 = copy_to_mode_reg (mode0, op0);
36132 if ((optimize && !register_operand (op1, mode1))
36133 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36134 op1 = copy_to_mode_reg (mode1, op1);
36136 pat = GEN_FCN (d->icode) (op0, op1);
36140 emit_insn (gen_rtx_SET (VOIDmode,
36141 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36142 gen_rtx_fmt_ee (comparison, QImode,
36146 return SUBREG_REG (target);
36149 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
36152 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36156 tree arg0 = CALL_EXPR_ARG (exp, 0);
36157 rtx op1, op0 = expand_normal (arg0);
36158 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36159 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36161 if (optimize || target == 0
36162 || GET_MODE (target) != tmode
36163 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36164 target = gen_reg_rtx (tmode);
36166 if (VECTOR_MODE_P (mode0))
36167 op0 = safe_vector_operand (op0, mode0);
36169 if ((optimize && !register_operand (op0, mode0))
36170 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36171 op0 = copy_to_mode_reg (mode0, op0);
36173 op1 = GEN_INT (d->comparison);
36175 pat = GEN_FCN (d->icode) (target, op0, op1);
36183 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36184 tree exp, rtx target)
36187 tree arg0 = CALL_EXPR_ARG (exp, 0);
36188 tree arg1 = CALL_EXPR_ARG (exp, 1);
36189 rtx op0 = expand_normal (arg0);
36190 rtx op1 = expand_normal (arg1);
36192 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36193 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36194 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36196 if (optimize || target == 0
36197 || GET_MODE (target) != tmode
36198 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36199 target = gen_reg_rtx (tmode);
36201 op0 = safe_vector_operand (op0, mode0);
36202 op1 = safe_vector_operand (op1, mode1);
36204 if ((optimize && !register_operand (op0, mode0))
36205 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36206 op0 = copy_to_mode_reg (mode0, op0);
36207 if ((optimize && !register_operand (op1, mode1))
36208 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36209 op1 = copy_to_mode_reg (mode1, op1);
36211 op2 = GEN_INT (d->comparison);
36213 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36220 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
36223 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36227 tree arg0 = CALL_EXPR_ARG (exp, 0);
36228 tree arg1 = CALL_EXPR_ARG (exp, 1);
36229 rtx op0 = expand_normal (arg0);
36230 rtx op1 = expand_normal (arg1);
36231 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36232 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36233 enum rtx_code comparison = d->comparison;
36235 if (VECTOR_MODE_P (mode0))
36236 op0 = safe_vector_operand (op0, mode0);
36237 if (VECTOR_MODE_P (mode1))
36238 op1 = safe_vector_operand (op1, mode1);
36240 target = gen_reg_rtx (SImode);
36241 emit_move_insn (target, const0_rtx);
36242 target = gen_rtx_SUBREG (QImode, target, 0);
36244 if ((optimize && !register_operand (op0, mode0))
36245 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36246 op0 = copy_to_mode_reg (mode0, op0);
36247 if ((optimize && !register_operand (op1, mode1))
36248 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36249 op1 = copy_to_mode_reg (mode1, op1);
36251 pat = GEN_FCN (d->icode) (op0, op1);
36255 emit_insn (gen_rtx_SET (VOIDmode,
36256 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36257 gen_rtx_fmt_ee (comparison, QImode,
36261 return SUBREG_REG (target);
36264 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36267 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36268 tree exp, rtx target)
36271 tree arg0 = CALL_EXPR_ARG (exp, 0);
36272 tree arg1 = CALL_EXPR_ARG (exp, 1);
36273 tree arg2 = CALL_EXPR_ARG (exp, 2);
36274 tree arg3 = CALL_EXPR_ARG (exp, 3);
36275 tree arg4 = CALL_EXPR_ARG (exp, 4);
36276 rtx scratch0, scratch1;
36277 rtx op0 = expand_normal (arg0);
36278 rtx op1 = expand_normal (arg1);
36279 rtx op2 = expand_normal (arg2);
36280 rtx op3 = expand_normal (arg3);
36281 rtx op4 = expand_normal (arg4);
36282 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36284 tmode0 = insn_data[d->icode].operand[0].mode;
36285 tmode1 = insn_data[d->icode].operand[1].mode;
36286 modev2 = insn_data[d->icode].operand[2].mode;
36287 modei3 = insn_data[d->icode].operand[3].mode;
36288 modev4 = insn_data[d->icode].operand[4].mode;
36289 modei5 = insn_data[d->icode].operand[5].mode;
36290 modeimm = insn_data[d->icode].operand[6].mode;
36292 if (VECTOR_MODE_P (modev2))
36293 op0 = safe_vector_operand (op0, modev2);
36294 if (VECTOR_MODE_P (modev4))
36295 op2 = safe_vector_operand (op2, modev4);
36297 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36298 op0 = copy_to_mode_reg (modev2, op0);
36299 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36300 op1 = copy_to_mode_reg (modei3, op1);
36301 if ((optimize && !register_operand (op2, modev4))
36302 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36303 op2 = copy_to_mode_reg (modev4, op2);
36304 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36305 op3 = copy_to_mode_reg (modei5, op3);
36307 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36309 error ("the fifth argument must be an 8-bit immediate");
36313 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36315 if (optimize || !target
36316 || GET_MODE (target) != tmode0
36317 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36318 target = gen_reg_rtx (tmode0);
36320 scratch1 = gen_reg_rtx (tmode1);
36322 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36324 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36326 if (optimize || !target
36327 || GET_MODE (target) != tmode1
36328 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36329 target = gen_reg_rtx (tmode1);
36331 scratch0 = gen_reg_rtx (tmode0);
36333 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36337 gcc_assert (d->flag);
36339 scratch0 = gen_reg_rtx (tmode0);
36340 scratch1 = gen_reg_rtx (tmode1);
36342 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36352 target = gen_reg_rtx (SImode);
36353 emit_move_insn (target, const0_rtx);
36354 target = gen_rtx_SUBREG (QImode, target, 0);
36357 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36358 gen_rtx_fmt_ee (EQ, QImode,
36359 gen_rtx_REG ((machine_mode) d->flag,
36362 return SUBREG_REG (target);
36369 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36372 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36373 tree exp, rtx target)
36376 tree arg0 = CALL_EXPR_ARG (exp, 0);
36377 tree arg1 = CALL_EXPR_ARG (exp, 1);
36378 tree arg2 = CALL_EXPR_ARG (exp, 2);
36379 rtx scratch0, scratch1;
36380 rtx op0 = expand_normal (arg0);
36381 rtx op1 = expand_normal (arg1);
36382 rtx op2 = expand_normal (arg2);
36383 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36385 tmode0 = insn_data[d->icode].operand[0].mode;
36386 tmode1 = insn_data[d->icode].operand[1].mode;
36387 modev2 = insn_data[d->icode].operand[2].mode;
36388 modev3 = insn_data[d->icode].operand[3].mode;
36389 modeimm = insn_data[d->icode].operand[4].mode;
36391 if (VECTOR_MODE_P (modev2))
36392 op0 = safe_vector_operand (op0, modev2);
36393 if (VECTOR_MODE_P (modev3))
36394 op1 = safe_vector_operand (op1, modev3);
36396 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36397 op0 = copy_to_mode_reg (modev2, op0);
36398 if ((optimize && !register_operand (op1, modev3))
36399 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36400 op1 = copy_to_mode_reg (modev3, op1);
36402 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36404 error ("the third argument must be an 8-bit immediate");
36408 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36410 if (optimize || !target
36411 || GET_MODE (target) != tmode0
36412 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36413 target = gen_reg_rtx (tmode0);
36415 scratch1 = gen_reg_rtx (tmode1);
36417 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36419 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36421 if (optimize || !target
36422 || GET_MODE (target) != tmode1
36423 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36424 target = gen_reg_rtx (tmode1);
36426 scratch0 = gen_reg_rtx (tmode0);
36428 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36432 gcc_assert (d->flag);
36434 scratch0 = gen_reg_rtx (tmode0);
36435 scratch1 = gen_reg_rtx (tmode1);
36437 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36447 target = gen_reg_rtx (SImode);
36448 emit_move_insn (target, const0_rtx);
36449 target = gen_rtx_SUBREG (QImode, target, 0);
36452 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36453 gen_rtx_fmt_ee (EQ, QImode,
36454 gen_rtx_REG ((machine_mode) d->flag,
36457 return SUBREG_REG (target);
36463 /* Subroutine of ix86_expand_builtin to take care of insns with
36464 variable number of operands. */
36467 ix86_expand_args_builtin (const struct builtin_description *d,
36468 tree exp, rtx target)
36470 rtx pat, real_target;
36471 unsigned int i, nargs;
36472 unsigned int nargs_constant = 0;
36473 unsigned int mask_pos = 0;
36474 int num_memory = 0;
36480 bool last_arg_count = false;
36481 enum insn_code icode = d->icode;
36482 const struct insn_data_d *insn_p = &insn_data[icode];
36483 machine_mode tmode = insn_p->operand[0].mode;
36484 machine_mode rmode = VOIDmode;
36486 enum rtx_code comparison = d->comparison;
36488 switch ((enum ix86_builtin_func_type) d->flag)
36490 case V2DF_FTYPE_V2DF_ROUND:
36491 case V4DF_FTYPE_V4DF_ROUND:
36492 case V4SF_FTYPE_V4SF_ROUND:
36493 case V8SF_FTYPE_V8SF_ROUND:
36494 case V4SI_FTYPE_V4SF_ROUND:
36495 case V8SI_FTYPE_V8SF_ROUND:
36496 return ix86_expand_sse_round (d, exp, target);
36497 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36498 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36499 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36500 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36501 case INT_FTYPE_V8SF_V8SF_PTEST:
36502 case INT_FTYPE_V4DI_V4DI_PTEST:
36503 case INT_FTYPE_V4DF_V4DF_PTEST:
36504 case INT_FTYPE_V4SF_V4SF_PTEST:
36505 case INT_FTYPE_V2DI_V2DI_PTEST:
36506 case INT_FTYPE_V2DF_V2DF_PTEST:
36507 return ix86_expand_sse_ptest (d, exp, target);
36508 case FLOAT128_FTYPE_FLOAT128:
36509 case FLOAT_FTYPE_FLOAT:
36510 case INT_FTYPE_INT:
36511 case UINT64_FTYPE_INT:
36512 case UINT16_FTYPE_UINT16:
36513 case INT64_FTYPE_INT64:
36514 case INT64_FTYPE_V4SF:
36515 case INT64_FTYPE_V2DF:
36516 case INT_FTYPE_V16QI:
36517 case INT_FTYPE_V8QI:
36518 case INT_FTYPE_V8SF:
36519 case INT_FTYPE_V4DF:
36520 case INT_FTYPE_V4SF:
36521 case INT_FTYPE_V2DF:
36522 case INT_FTYPE_V32QI:
36523 case V16QI_FTYPE_V16QI:
36524 case V8SI_FTYPE_V8SF:
36525 case V8SI_FTYPE_V4SI:
36526 case V8HI_FTYPE_V8HI:
36527 case V8HI_FTYPE_V16QI:
36528 case V8QI_FTYPE_V8QI:
36529 case V8SF_FTYPE_V8SF:
36530 case V8SF_FTYPE_V8SI:
36531 case V8SF_FTYPE_V4SF:
36532 case V8SF_FTYPE_V8HI:
36533 case V4SI_FTYPE_V4SI:
36534 case V4SI_FTYPE_V16QI:
36535 case V4SI_FTYPE_V4SF:
36536 case V4SI_FTYPE_V8SI:
36537 case V4SI_FTYPE_V8HI:
36538 case V4SI_FTYPE_V4DF:
36539 case V4SI_FTYPE_V2DF:
36540 case V4HI_FTYPE_V4HI:
36541 case V4DF_FTYPE_V4DF:
36542 case V4DF_FTYPE_V4SI:
36543 case V4DF_FTYPE_V4SF:
36544 case V4DF_FTYPE_V2DF:
36545 case V4SF_FTYPE_V4SF:
36546 case V4SF_FTYPE_V4SI:
36547 case V4SF_FTYPE_V8SF:
36548 case V4SF_FTYPE_V4DF:
36549 case V4SF_FTYPE_V8HI:
36550 case V4SF_FTYPE_V2DF:
36551 case V2DI_FTYPE_V2DI:
36552 case V2DI_FTYPE_V16QI:
36553 case V2DI_FTYPE_V8HI:
36554 case V2DI_FTYPE_V4SI:
36555 case V2DF_FTYPE_V2DF:
36556 case V2DF_FTYPE_V4SI:
36557 case V2DF_FTYPE_V4DF:
36558 case V2DF_FTYPE_V4SF:
36559 case V2DF_FTYPE_V2SI:
36560 case V2SI_FTYPE_V2SI:
36561 case V2SI_FTYPE_V4SF:
36562 case V2SI_FTYPE_V2SF:
36563 case V2SI_FTYPE_V2DF:
36564 case V2SF_FTYPE_V2SF:
36565 case V2SF_FTYPE_V2SI:
36566 case V32QI_FTYPE_V32QI:
36567 case V32QI_FTYPE_V16QI:
36568 case V16HI_FTYPE_V16HI:
36569 case V16HI_FTYPE_V8HI:
36570 case V8SI_FTYPE_V8SI:
36571 case V16HI_FTYPE_V16QI:
36572 case V8SI_FTYPE_V16QI:
36573 case V4DI_FTYPE_V16QI:
36574 case V8SI_FTYPE_V8HI:
36575 case V4DI_FTYPE_V8HI:
36576 case V4DI_FTYPE_V4SI:
36577 case V4DI_FTYPE_V2DI:
36579 case HI_FTYPE_V16QI:
36580 case SI_FTYPE_V32QI:
36581 case DI_FTYPE_V64QI:
36582 case V16QI_FTYPE_HI:
36583 case V32QI_FTYPE_SI:
36584 case V64QI_FTYPE_DI:
36585 case V8HI_FTYPE_QI:
36586 case V16HI_FTYPE_HI:
36587 case V32HI_FTYPE_SI:
36588 case V4SI_FTYPE_QI:
36589 case V8SI_FTYPE_QI:
36590 case V4SI_FTYPE_HI:
36591 case V8SI_FTYPE_HI:
36592 case QI_FTYPE_V8HI:
36593 case HI_FTYPE_V16HI:
36594 case SI_FTYPE_V32HI:
36595 case QI_FTYPE_V4SI:
36596 case QI_FTYPE_V8SI:
36597 case HI_FTYPE_V16SI:
36598 case QI_FTYPE_V2DI:
36599 case QI_FTYPE_V4DI:
36600 case QI_FTYPE_V8DI:
36601 case UINT_FTYPE_V2DF:
36602 case UINT_FTYPE_V4SF:
36603 case UINT64_FTYPE_V2DF:
36604 case UINT64_FTYPE_V4SF:
36605 case V16QI_FTYPE_V8DI:
36606 case V16HI_FTYPE_V16SI:
36607 case V16SI_FTYPE_HI:
36608 case V2DI_FTYPE_QI:
36609 case V4DI_FTYPE_QI:
36610 case V16SI_FTYPE_V16SI:
36611 case V16SI_FTYPE_INT:
36612 case V16SF_FTYPE_FLOAT:
36613 case V16SF_FTYPE_V8SF:
36614 case V16SI_FTYPE_V8SI:
36615 case V16SF_FTYPE_V4SF:
36616 case V16SI_FTYPE_V4SI:
36617 case V16SF_FTYPE_V16SF:
36618 case V8HI_FTYPE_V8DI:
36619 case V8UHI_FTYPE_V8UHI:
36620 case V8SI_FTYPE_V8DI:
36621 case V8SF_FTYPE_V8DF:
36622 case V8DI_FTYPE_QI:
36623 case V8DI_FTYPE_INT64:
36624 case V8DI_FTYPE_V4DI:
36625 case V8DI_FTYPE_V8DI:
36626 case V8DF_FTYPE_DOUBLE:
36627 case V8DF_FTYPE_V4DF:
36628 case V8DF_FTYPE_V2DF:
36629 case V8DF_FTYPE_V8DF:
36630 case V8DF_FTYPE_V8SI:
36633 case V4SF_FTYPE_V4SF_VEC_MERGE:
36634 case V2DF_FTYPE_V2DF_VEC_MERGE:
36635 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36636 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36637 case V16QI_FTYPE_V16QI_V16QI:
36638 case V16QI_FTYPE_V8HI_V8HI:
36639 case V16SI_FTYPE_V16SI_V16SI:
36640 case V16SF_FTYPE_V16SF_V16SF:
36641 case V16SF_FTYPE_V16SF_V16SI:
36642 case V8QI_FTYPE_V8QI_V8QI:
36643 case V8QI_FTYPE_V4HI_V4HI:
36644 case V8HI_FTYPE_V8HI_V8HI:
36645 case V8HI_FTYPE_V16QI_V16QI:
36646 case V8HI_FTYPE_V4SI_V4SI:
36647 case V8SF_FTYPE_V8SF_V8SF:
36648 case V8SF_FTYPE_V8SF_V8SI:
36649 case V8DI_FTYPE_V8DI_V8DI:
36650 case V8DF_FTYPE_V8DF_V8DF:
36651 case V8DF_FTYPE_V8DF_V8DI:
36652 case V4SI_FTYPE_V4SI_V4SI:
36653 case V4SI_FTYPE_V8HI_V8HI:
36654 case V4SI_FTYPE_V4SF_V4SF:
36655 case V4SI_FTYPE_V2DF_V2DF:
36656 case V4HI_FTYPE_V4HI_V4HI:
36657 case V4HI_FTYPE_V8QI_V8QI:
36658 case V4HI_FTYPE_V2SI_V2SI:
36659 case V4DF_FTYPE_V4DF_V4DF:
36660 case V4DF_FTYPE_V4DF_V4DI:
36661 case V4SF_FTYPE_V4SF_V4SF:
36662 case V4SF_FTYPE_V4SF_V4SI:
36663 case V4SF_FTYPE_V4SF_V2SI:
36664 case V4SF_FTYPE_V4SF_V2DF:
36665 case V4SF_FTYPE_V4SF_UINT:
36666 case V4SF_FTYPE_V4SF_UINT64:
36667 case V4SF_FTYPE_V4SF_DI:
36668 case V4SF_FTYPE_V4SF_SI:
36669 case V2DI_FTYPE_V2DI_V2DI:
36670 case V2DI_FTYPE_V16QI_V16QI:
36671 case V2DI_FTYPE_V4SI_V4SI:
36672 case V2UDI_FTYPE_V4USI_V4USI:
36673 case V2DI_FTYPE_V2DI_V16QI:
36674 case V2DI_FTYPE_V2DF_V2DF:
36675 case V2SI_FTYPE_V2SI_V2SI:
36676 case V2SI_FTYPE_V4HI_V4HI:
36677 case V2SI_FTYPE_V2SF_V2SF:
36678 case V2DF_FTYPE_V2DF_V2DF:
36679 case V2DF_FTYPE_V2DF_V4SF:
36680 case V2DF_FTYPE_V2DF_V2DI:
36681 case V2DF_FTYPE_V2DF_DI:
36682 case V2DF_FTYPE_V2DF_SI:
36683 case V2DF_FTYPE_V2DF_UINT:
36684 case V2DF_FTYPE_V2DF_UINT64:
36685 case V2SF_FTYPE_V2SF_V2SF:
36686 case V1DI_FTYPE_V1DI_V1DI:
36687 case V1DI_FTYPE_V8QI_V8QI:
36688 case V1DI_FTYPE_V2SI_V2SI:
36689 case V32QI_FTYPE_V16HI_V16HI:
36690 case V16HI_FTYPE_V8SI_V8SI:
36691 case V32QI_FTYPE_V32QI_V32QI:
36692 case V16HI_FTYPE_V32QI_V32QI:
36693 case V16HI_FTYPE_V16HI_V16HI:
36694 case V8SI_FTYPE_V4DF_V4DF:
36695 case V8SI_FTYPE_V8SI_V8SI:
36696 case V8SI_FTYPE_V16HI_V16HI:
36697 case V4DI_FTYPE_V4DI_V4DI:
36698 case V4DI_FTYPE_V8SI_V8SI:
36699 case V4UDI_FTYPE_V8USI_V8USI:
36700 case QI_FTYPE_V8DI_V8DI:
36701 case V8DI_FTYPE_V64QI_V64QI:
36702 case HI_FTYPE_V16SI_V16SI:
36703 if (comparison == UNKNOWN)
36704 return ix86_expand_binop_builtin (icode, exp, target);
36707 case V4SF_FTYPE_V4SF_V4SF_SWAP:
36708 case V2DF_FTYPE_V2DF_V2DF_SWAP:
36709 gcc_assert (comparison != UNKNOWN);
36713 case V16HI_FTYPE_V16HI_V8HI_COUNT:
36714 case V16HI_FTYPE_V16HI_SI_COUNT:
36715 case V8SI_FTYPE_V8SI_V4SI_COUNT:
36716 case V8SI_FTYPE_V8SI_SI_COUNT:
36717 case V4DI_FTYPE_V4DI_V2DI_COUNT:
36718 case V4DI_FTYPE_V4DI_INT_COUNT:
36719 case V8HI_FTYPE_V8HI_V8HI_COUNT:
36720 case V8HI_FTYPE_V8HI_SI_COUNT:
36721 case V4SI_FTYPE_V4SI_V4SI_COUNT:
36722 case V4SI_FTYPE_V4SI_SI_COUNT:
36723 case V4HI_FTYPE_V4HI_V4HI_COUNT:
36724 case V4HI_FTYPE_V4HI_SI_COUNT:
36725 case V2DI_FTYPE_V2DI_V2DI_COUNT:
36726 case V2DI_FTYPE_V2DI_SI_COUNT:
36727 case V2SI_FTYPE_V2SI_V2SI_COUNT:
36728 case V2SI_FTYPE_V2SI_SI_COUNT:
36729 case V1DI_FTYPE_V1DI_V1DI_COUNT:
36730 case V1DI_FTYPE_V1DI_SI_COUNT:
36732 last_arg_count = true;
36734 case UINT64_FTYPE_UINT64_UINT64:
36735 case UINT_FTYPE_UINT_UINT:
36736 case UINT_FTYPE_UINT_USHORT:
36737 case UINT_FTYPE_UINT_UCHAR:
36738 case UINT16_FTYPE_UINT16_INT:
36739 case UINT8_FTYPE_UINT8_INT:
36740 case HI_FTYPE_HI_HI:
36741 case SI_FTYPE_SI_SI:
36742 case DI_FTYPE_DI_DI:
36743 case V16SI_FTYPE_V8DF_V8DF:
36746 case V2DI_FTYPE_V2DI_INT_CONVERT:
36749 nargs_constant = 1;
36751 case V4DI_FTYPE_V4DI_INT_CONVERT:
36754 nargs_constant = 1;
36756 case V8DI_FTYPE_V8DI_INT_CONVERT:
36759 nargs_constant = 1;
36761 case V8HI_FTYPE_V8HI_INT:
36762 case V8HI_FTYPE_V8SF_INT:
36763 case V16HI_FTYPE_V16SF_INT:
36764 case V8HI_FTYPE_V4SF_INT:
36765 case V8SF_FTYPE_V8SF_INT:
36766 case V4SF_FTYPE_V16SF_INT:
36767 case V16SF_FTYPE_V16SF_INT:
36768 case V4SI_FTYPE_V4SI_INT:
36769 case V4SI_FTYPE_V8SI_INT:
36770 case V4HI_FTYPE_V4HI_INT:
36771 case V4DF_FTYPE_V4DF_INT:
36772 case V4DF_FTYPE_V8DF_INT:
36773 case V4SF_FTYPE_V4SF_INT:
36774 case V4SF_FTYPE_V8SF_INT:
36775 case V2DI_FTYPE_V2DI_INT:
36776 case V2DF_FTYPE_V2DF_INT:
36777 case V2DF_FTYPE_V4DF_INT:
36778 case V16HI_FTYPE_V16HI_INT:
36779 case V8SI_FTYPE_V8SI_INT:
36780 case V16SI_FTYPE_V16SI_INT:
36781 case V4SI_FTYPE_V16SI_INT:
36782 case V4DI_FTYPE_V4DI_INT:
36783 case V2DI_FTYPE_V4DI_INT:
36784 case V4DI_FTYPE_V8DI_INT:
36785 case HI_FTYPE_HI_INT:
36786 case QI_FTYPE_V4SF_INT:
36787 case QI_FTYPE_V2DF_INT:
36789 nargs_constant = 1;
36791 case V16QI_FTYPE_V16QI_V16QI_V16QI:
36792 case V8SF_FTYPE_V8SF_V8SF_V8SF:
36793 case V4DF_FTYPE_V4DF_V4DF_V4DF:
36794 case V4SF_FTYPE_V4SF_V4SF_V4SF:
36795 case V2DF_FTYPE_V2DF_V2DF_V2DF:
36796 case V32QI_FTYPE_V32QI_V32QI_V32QI:
36797 case HI_FTYPE_V16SI_V16SI_HI:
36798 case QI_FTYPE_V8DI_V8DI_QI:
36799 case V16HI_FTYPE_V16SI_V16HI_HI:
36800 case V16QI_FTYPE_V16SI_V16QI_HI:
36801 case V16QI_FTYPE_V8DI_V16QI_QI:
36802 case V16SF_FTYPE_V16SF_V16SF_HI:
36803 case V16SF_FTYPE_V16SF_V16SF_V16SF:
36804 case V16SF_FTYPE_V16SF_V16SI_V16SF:
36805 case V16SF_FTYPE_V16SI_V16SF_HI:
36806 case V16SF_FTYPE_V16SI_V16SF_V16SF:
36807 case V16SF_FTYPE_V4SF_V16SF_HI:
36808 case V16SI_FTYPE_SI_V16SI_HI:
36809 case V16SI_FTYPE_V16HI_V16SI_HI:
36810 case V16SI_FTYPE_V16QI_V16SI_HI:
36811 case V16SI_FTYPE_V16SF_V16SI_HI:
36812 case V8SF_FTYPE_V4SF_V8SF_QI:
36813 case V4DF_FTYPE_V2DF_V4DF_QI:
36814 case V8SI_FTYPE_V4SI_V8SI_QI:
36815 case V8SI_FTYPE_SI_V8SI_QI:
36816 case V4SI_FTYPE_V4SI_V4SI_QI:
36817 case V4SI_FTYPE_SI_V4SI_QI:
36818 case V4DI_FTYPE_V2DI_V4DI_QI:
36819 case V4DI_FTYPE_DI_V4DI_QI:
36820 case V2DI_FTYPE_V2DI_V2DI_QI:
36821 case V2DI_FTYPE_DI_V2DI_QI:
36822 case V64QI_FTYPE_V64QI_V64QI_DI:
36823 case V64QI_FTYPE_V16QI_V64QI_DI:
36824 case V64QI_FTYPE_QI_V64QI_DI:
36825 case V32QI_FTYPE_V32QI_V32QI_SI:
36826 case V32QI_FTYPE_V16QI_V32QI_SI:
36827 case V32QI_FTYPE_QI_V32QI_SI:
36828 case V16QI_FTYPE_V16QI_V16QI_HI:
36829 case V16QI_FTYPE_QI_V16QI_HI:
36830 case V32HI_FTYPE_V8HI_V32HI_SI:
36831 case V32HI_FTYPE_HI_V32HI_SI:
36832 case V16HI_FTYPE_V8HI_V16HI_HI:
36833 case V16HI_FTYPE_HI_V16HI_HI:
36834 case V8HI_FTYPE_V8HI_V8HI_QI:
36835 case V8HI_FTYPE_HI_V8HI_QI:
36836 case V8SF_FTYPE_V8HI_V8SF_QI:
36837 case V4SF_FTYPE_V8HI_V4SF_QI:
36838 case V8SI_FTYPE_V8SF_V8SI_QI:
36839 case V4SI_FTYPE_V4SF_V4SI_QI:
36840 case V8DI_FTYPE_V8SF_V8DI_QI:
36841 case V4DI_FTYPE_V4SF_V4DI_QI:
36842 case V2DI_FTYPE_V4SF_V2DI_QI:
36843 case V8SF_FTYPE_V8DI_V8SF_QI:
36844 case V4SF_FTYPE_V4DI_V4SF_QI:
36845 case V4SF_FTYPE_V2DI_V4SF_QI:
36846 case V8DF_FTYPE_V8DI_V8DF_QI:
36847 case V4DF_FTYPE_V4DI_V4DF_QI:
36848 case V2DF_FTYPE_V2DI_V2DF_QI:
36849 case V16QI_FTYPE_V8HI_V16QI_QI:
36850 case V16QI_FTYPE_V16HI_V16QI_HI:
36851 case V16QI_FTYPE_V4SI_V16QI_QI:
36852 case V16QI_FTYPE_V8SI_V16QI_QI:
36853 case V8HI_FTYPE_V4SI_V8HI_QI:
36854 case V8HI_FTYPE_V8SI_V8HI_QI:
36855 case V16QI_FTYPE_V2DI_V16QI_QI:
36856 case V16QI_FTYPE_V4DI_V16QI_QI:
36857 case V8HI_FTYPE_V2DI_V8HI_QI:
36858 case V8HI_FTYPE_V4DI_V8HI_QI:
36859 case V4SI_FTYPE_V2DI_V4SI_QI:
36860 case V4SI_FTYPE_V4DI_V4SI_QI:
36861 case V32QI_FTYPE_V32HI_V32QI_SI:
36862 case HI_FTYPE_V16QI_V16QI_HI:
36863 case SI_FTYPE_V32QI_V32QI_SI:
36864 case DI_FTYPE_V64QI_V64QI_DI:
36865 case QI_FTYPE_V8HI_V8HI_QI:
36866 case HI_FTYPE_V16HI_V16HI_HI:
36867 case SI_FTYPE_V32HI_V32HI_SI:
36868 case QI_FTYPE_V4SI_V4SI_QI:
36869 case QI_FTYPE_V8SI_V8SI_QI:
36870 case QI_FTYPE_V2DI_V2DI_QI:
36871 case QI_FTYPE_V4DI_V4DI_QI:
36872 case V4SF_FTYPE_V2DF_V4SF_QI:
36873 case V4SF_FTYPE_V4DF_V4SF_QI:
36874 case V16SI_FTYPE_V16SI_V16SI_HI:
36875 case V16SI_FTYPE_V16SI_V16SI_V16SI:
36876 case V16SI_FTYPE_V4SI_V16SI_HI:
36877 case V2DI_FTYPE_V2DI_V2DI_V2DI:
36878 case V2DI_FTYPE_V4SI_V2DI_QI:
36879 case V2DI_FTYPE_V8HI_V2DI_QI:
36880 case V2DI_FTYPE_V16QI_V2DI_QI:
36881 case V4DI_FTYPE_V4DI_V4DI_QI:
36882 case V4DI_FTYPE_V4SI_V4DI_QI:
36883 case V4DI_FTYPE_V8HI_V4DI_QI:
36884 case V4DI_FTYPE_V16QI_V4DI_QI:
36885 case V8DI_FTYPE_V8DF_V8DI_QI:
36886 case V4DI_FTYPE_V4DF_V4DI_QI:
36887 case V2DI_FTYPE_V2DF_V2DI_QI:
36888 case V4SI_FTYPE_V4DF_V4SI_QI:
36889 case V4SI_FTYPE_V2DF_V4SI_QI:
36890 case V4SI_FTYPE_V8HI_V4SI_QI:
36891 case V4SI_FTYPE_V16QI_V4SI_QI:
36892 case V8SI_FTYPE_V8SI_V8SI_V8SI:
36893 case V4DI_FTYPE_V4DI_V4DI_V4DI:
36894 case V8DF_FTYPE_V2DF_V8DF_QI:
36895 case V8DF_FTYPE_V4DF_V8DF_QI:
36896 case V8DF_FTYPE_V8DF_V8DF_QI:
36897 case V8DF_FTYPE_V8DF_V8DF_V8DF:
36898 case V8SF_FTYPE_V8SF_V8SF_QI:
36899 case V8SF_FTYPE_V8SI_V8SF_QI:
36900 case V4DF_FTYPE_V4DF_V4DF_QI:
36901 case V4SF_FTYPE_V4SF_V4SF_QI:
36902 case V2DF_FTYPE_V2DF_V2DF_QI:
36903 case V2DF_FTYPE_V4SF_V2DF_QI:
36904 case V2DF_FTYPE_V4SI_V2DF_QI:
36905 case V4SF_FTYPE_V4SI_V4SF_QI:
36906 case V4DF_FTYPE_V4SF_V4DF_QI:
36907 case V4DF_FTYPE_V4SI_V4DF_QI:
36908 case V8SI_FTYPE_V8SI_V8SI_QI:
36909 case V8SI_FTYPE_V8HI_V8SI_QI:
36910 case V8SI_FTYPE_V16QI_V8SI_QI:
36911 case V8DF_FTYPE_V8DF_V8DI_V8DF:
36912 case V8DF_FTYPE_V8DI_V8DF_V8DF:
36913 case V8DF_FTYPE_V8SF_V8DF_QI:
36914 case V8DF_FTYPE_V8SI_V8DF_QI:
36915 case V8DI_FTYPE_DI_V8DI_QI:
36916 case V16SF_FTYPE_V8SF_V16SF_HI:
36917 case V16SI_FTYPE_V8SI_V16SI_HI:
36918 case V16HI_FTYPE_V16HI_V16HI_HI:
36919 case V8HI_FTYPE_V16QI_V8HI_QI:
36920 case V16HI_FTYPE_V16QI_V16HI_HI:
36921 case V32HI_FTYPE_V32HI_V32HI_SI:
36922 case V32HI_FTYPE_V32QI_V32HI_SI:
36923 case V8DI_FTYPE_V16QI_V8DI_QI:
36924 case V8DI_FTYPE_V2DI_V8DI_QI:
36925 case V8DI_FTYPE_V4DI_V8DI_QI:
36926 case V8DI_FTYPE_V8DI_V8DI_QI:
36927 case V8DI_FTYPE_V8DI_V8DI_V8DI:
36928 case V8DI_FTYPE_V8HI_V8DI_QI:
36929 case V8DI_FTYPE_V8SI_V8DI_QI:
36930 case V8HI_FTYPE_V8DI_V8HI_QI:
36931 case V8SF_FTYPE_V8DF_V8SF_QI:
36932 case V8SI_FTYPE_V8DF_V8SI_QI:
36933 case V8SI_FTYPE_V8DI_V8SI_QI:
36934 case V4SI_FTYPE_V4SI_V4SI_V4SI:
36937 case V32QI_FTYPE_V32QI_V32QI_INT:
36938 case V16HI_FTYPE_V16HI_V16HI_INT:
36939 case V16QI_FTYPE_V16QI_V16QI_INT:
36940 case V4DI_FTYPE_V4DI_V4DI_INT:
36941 case V8HI_FTYPE_V8HI_V8HI_INT:
36942 case V8SI_FTYPE_V8SI_V8SI_INT:
36943 case V8SI_FTYPE_V8SI_V4SI_INT:
36944 case V8SF_FTYPE_V8SF_V8SF_INT:
36945 case V8SF_FTYPE_V8SF_V4SF_INT:
36946 case V4SI_FTYPE_V4SI_V4SI_INT:
36947 case V4DF_FTYPE_V4DF_V4DF_INT:
36948 case V16SF_FTYPE_V16SF_V16SF_INT:
36949 case V16SF_FTYPE_V16SF_V4SF_INT:
36950 case V16SI_FTYPE_V16SI_V4SI_INT:
36951 case V4DF_FTYPE_V4DF_V2DF_INT:
36952 case V4SF_FTYPE_V4SF_V4SF_INT:
36953 case V2DI_FTYPE_V2DI_V2DI_INT:
36954 case V4DI_FTYPE_V4DI_V2DI_INT:
36955 case V2DF_FTYPE_V2DF_V2DF_INT:
36956 case QI_FTYPE_V8DI_V8DI_INT:
36957 case QI_FTYPE_V8DF_V8DF_INT:
36958 case QI_FTYPE_V2DF_V2DF_INT:
36959 case QI_FTYPE_V4SF_V4SF_INT:
36960 case HI_FTYPE_V16SI_V16SI_INT:
36961 case HI_FTYPE_V16SF_V16SF_INT:
36963 nargs_constant = 1;
36965 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
36968 nargs_constant = 1;
36970 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
36973 nargs_constant = 1;
36975 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
36978 nargs_constant = 1;
36980 case V2DI_FTYPE_V2DI_UINT_UINT:
36982 nargs_constant = 2;
36984 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
36987 nargs_constant = 1;
36989 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
36993 nargs_constant = 1;
36995 case QI_FTYPE_V8DF_INT_QI:
36996 case QI_FTYPE_V4DF_INT_QI:
36997 case QI_FTYPE_V2DF_INT_QI:
36998 case HI_FTYPE_V16SF_INT_HI:
36999 case QI_FTYPE_V8SF_INT_QI:
37000 case QI_FTYPE_V4SF_INT_QI:
37003 nargs_constant = 1;
37005 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
37009 nargs_constant = 1;
37011 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
37015 nargs_constant = 1;
37017 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
37018 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
37019 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
37020 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
37021 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
37022 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
37023 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
37024 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
37025 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
37026 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
37027 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
37028 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
37029 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
37030 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
37031 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
37032 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
37033 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
37034 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
37035 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
37036 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
37037 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
37038 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
37039 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
37040 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
37041 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
37042 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
37043 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
37044 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
37045 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
37046 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
37047 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
37048 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
37049 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
37050 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
37051 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
37052 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
37053 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
37054 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
37055 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
37056 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
37057 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
37058 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
37059 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
37060 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
37061 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
37062 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
37063 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
37064 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
37065 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
37066 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
37067 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
37068 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
37069 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
37070 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
37073 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
37074 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
37075 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37076 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37077 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37079 nargs_constant = 1;
37081 case QI_FTYPE_V4DI_V4DI_INT_QI:
37082 case QI_FTYPE_V8SI_V8SI_INT_QI:
37083 case QI_FTYPE_V4DF_V4DF_INT_QI:
37084 case QI_FTYPE_V8SF_V8SF_INT_QI:
37085 case QI_FTYPE_V2DI_V2DI_INT_QI:
37086 case QI_FTYPE_V4SI_V4SI_INT_QI:
37087 case QI_FTYPE_V2DF_V2DF_INT_QI:
37088 case QI_FTYPE_V4SF_V4SF_INT_QI:
37089 case DI_FTYPE_V64QI_V64QI_INT_DI:
37090 case SI_FTYPE_V32QI_V32QI_INT_SI:
37091 case HI_FTYPE_V16QI_V16QI_INT_HI:
37092 case SI_FTYPE_V32HI_V32HI_INT_SI:
37093 case HI_FTYPE_V16HI_V16HI_INT_HI:
37094 case QI_FTYPE_V8HI_V8HI_INT_QI:
37097 nargs_constant = 1;
37099 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37101 nargs_constant = 2;
37103 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37104 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37107 case QI_FTYPE_V8DI_V8DI_INT_QI:
37108 case HI_FTYPE_V16SI_V16SI_INT_HI:
37109 case QI_FTYPE_V8DF_V8DF_INT_QI:
37110 case HI_FTYPE_V16SF_V16SF_INT_HI:
37113 nargs_constant = 1;
37115 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
37116 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
37117 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
37118 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
37119 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
37120 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
37121 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
37122 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
37123 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
37124 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
37125 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
37126 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
37127 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
37128 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
37129 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
37130 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
37131 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
37132 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
37133 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
37134 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
37135 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
37136 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
37137 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
37138 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
37139 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
37140 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
37141 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
37142 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
37143 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
37144 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
37147 nargs_constant = 1;
37149 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
37150 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
37151 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
37152 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
37153 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
37154 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
37155 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
37156 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
37157 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
37158 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
37159 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
37160 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
37161 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
37162 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
37163 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
37164 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
37165 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
37166 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
37167 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
37168 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
37169 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
37170 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
37171 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
37172 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
37173 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
37174 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
37175 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
37178 nargs_constant = 1;
37180 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
37181 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
37182 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
37183 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
37184 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
37185 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
37186 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
37187 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
37188 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
37189 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
37190 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
37194 nargs_constant = 1;
37198 gcc_unreachable ();
37201 gcc_assert (nargs <= ARRAY_SIZE (args));
37203 if (comparison != UNKNOWN)
37205 gcc_assert (nargs == 2);
37206 return ix86_expand_sse_compare (d, exp, target, swap);
37209 if (rmode == VOIDmode || rmode == tmode)
37213 || GET_MODE (target) != tmode
37214 || !insn_p->operand[0].predicate (target, tmode))
37215 target = gen_reg_rtx (tmode);
37216 real_target = target;
37220 real_target = gen_reg_rtx (tmode);
37221 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37224 for (i = 0; i < nargs; i++)
37226 tree arg = CALL_EXPR_ARG (exp, i);
37227 rtx op = expand_normal (arg);
37228 machine_mode mode = insn_p->operand[i + 1].mode;
37229 bool match = insn_p->operand[i + 1].predicate (op, mode);
37231 if (last_arg_count && (i + 1) == nargs)
37233 /* SIMD shift insns take either an 8-bit immediate or
37234 register as count. But builtin functions take int as
37235 count. If count doesn't match, we put it in register. */
37238 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37239 if (!insn_p->operand[i + 1].predicate (op, mode))
37240 op = copy_to_reg (op);
37243 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37244 (!mask_pos && (nargs - i) <= nargs_constant))
37249 case CODE_FOR_avx_vinsertf128v4di:
37250 case CODE_FOR_avx_vextractf128v4di:
37251 error ("the last argument must be an 1-bit immediate");
37254 case CODE_FOR_avx512f_cmpv8di3_mask:
37255 case CODE_FOR_avx512f_cmpv16si3_mask:
37256 case CODE_FOR_avx512f_ucmpv8di3_mask:
37257 case CODE_FOR_avx512f_ucmpv16si3_mask:
37258 case CODE_FOR_avx512vl_cmpv4di3_mask:
37259 case CODE_FOR_avx512vl_cmpv8si3_mask:
37260 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37261 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37262 case CODE_FOR_avx512vl_cmpv2di3_mask:
37263 case CODE_FOR_avx512vl_cmpv4si3_mask:
37264 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37265 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37266 error ("the last argument must be a 3-bit immediate");
37269 case CODE_FOR_sse4_1_roundsd:
37270 case CODE_FOR_sse4_1_roundss:
37272 case CODE_FOR_sse4_1_roundpd:
37273 case CODE_FOR_sse4_1_roundps:
37274 case CODE_FOR_avx_roundpd256:
37275 case CODE_FOR_avx_roundps256:
37277 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37278 case CODE_FOR_sse4_1_roundps_sfix:
37279 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37280 case CODE_FOR_avx_roundps_sfix256:
37282 case CODE_FOR_sse4_1_blendps:
37283 case CODE_FOR_avx_blendpd256:
37284 case CODE_FOR_avx_vpermilv4df:
37285 case CODE_FOR_avx_vpermilv4df_mask:
37286 case CODE_FOR_avx512f_getmantv8df_mask:
37287 case CODE_FOR_avx512f_getmantv16sf_mask:
37288 case CODE_FOR_avx512vl_getmantv8sf_mask:
37289 case CODE_FOR_avx512vl_getmantv4df_mask:
37290 case CODE_FOR_avx512vl_getmantv4sf_mask:
37291 case CODE_FOR_avx512vl_getmantv2df_mask:
37292 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37293 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37294 case CODE_FOR_avx512dq_rangepv4df_mask:
37295 case CODE_FOR_avx512dq_rangepv8sf_mask:
37296 case CODE_FOR_avx512dq_rangepv2df_mask:
37297 case CODE_FOR_avx512dq_rangepv4sf_mask:
37298 case CODE_FOR_avx_shufpd256_mask:
37299 error ("the last argument must be a 4-bit immediate");
37302 case CODE_FOR_sha1rnds4:
37303 case CODE_FOR_sse4_1_blendpd:
37304 case CODE_FOR_avx_vpermilv2df:
37305 case CODE_FOR_avx_vpermilv2df_mask:
37306 case CODE_FOR_xop_vpermil2v2df3:
37307 case CODE_FOR_xop_vpermil2v4sf3:
37308 case CODE_FOR_xop_vpermil2v4df3:
37309 case CODE_FOR_xop_vpermil2v8sf3:
37310 case CODE_FOR_avx512f_vinsertf32x4_mask:
37311 case CODE_FOR_avx512f_vinserti32x4_mask:
37312 case CODE_FOR_avx512f_vextractf32x4_mask:
37313 case CODE_FOR_avx512f_vextracti32x4_mask:
37314 case CODE_FOR_sse2_shufpd:
37315 case CODE_FOR_sse2_shufpd_mask:
37316 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37317 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37318 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37319 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37320 error ("the last argument must be a 2-bit immediate");
37323 case CODE_FOR_avx_vextractf128v4df:
37324 case CODE_FOR_avx_vextractf128v8sf:
37325 case CODE_FOR_avx_vextractf128v8si:
37326 case CODE_FOR_avx_vinsertf128v4df:
37327 case CODE_FOR_avx_vinsertf128v8sf:
37328 case CODE_FOR_avx_vinsertf128v8si:
37329 case CODE_FOR_avx512f_vinsertf64x4_mask:
37330 case CODE_FOR_avx512f_vinserti64x4_mask:
37331 case CODE_FOR_avx512f_vextractf64x4_mask:
37332 case CODE_FOR_avx512f_vextracti64x4_mask:
37333 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37334 case CODE_FOR_avx512dq_vinserti32x8_mask:
37335 case CODE_FOR_avx512vl_vinsertv4df:
37336 case CODE_FOR_avx512vl_vinsertv4di:
37337 case CODE_FOR_avx512vl_vinsertv8sf:
37338 case CODE_FOR_avx512vl_vinsertv8si:
37339 error ("the last argument must be a 1-bit immediate");
37342 case CODE_FOR_avx_vmcmpv2df3:
37343 case CODE_FOR_avx_vmcmpv4sf3:
37344 case CODE_FOR_avx_cmpv2df3:
37345 case CODE_FOR_avx_cmpv4sf3:
37346 case CODE_FOR_avx_cmpv4df3:
37347 case CODE_FOR_avx_cmpv8sf3:
37348 case CODE_FOR_avx512f_cmpv8df3_mask:
37349 case CODE_FOR_avx512f_cmpv16sf3_mask:
37350 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37351 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37352 error ("the last argument must be a 5-bit immediate");
37356 switch (nargs_constant)
37359 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37360 (!mask_pos && (nargs - i) == nargs_constant))
37362 error ("the next to last argument must be an 8-bit immediate");
37366 error ("the last argument must be an 8-bit immediate");
37369 gcc_unreachable ();
37376 if (VECTOR_MODE_P (mode))
37377 op = safe_vector_operand (op, mode);
37379 /* If we aren't optimizing, only allow one memory operand to
37381 if (memory_operand (op, mode))
37384 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37386 if (optimize || !match || num_memory > 1)
37387 op = copy_to_mode_reg (mode, op);
37391 op = copy_to_reg (op);
37392 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37397 args[i].mode = mode;
37403 pat = GEN_FCN (icode) (real_target, args[0].op);
37406 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37409 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37413 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37414 args[2].op, args[3].op);
37417 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37418 args[2].op, args[3].op, args[4].op);
37420 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37421 args[2].op, args[3].op, args[4].op,
37425 gcc_unreachable ();
37435 /* Transform pattern of following layout:
37438 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37446 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37450 (parallel [ A B ... ]) */
37453 ix86_erase_embedded_rounding (rtx pat)
37455 if (GET_CODE (pat) == INSN)
37456 pat = PATTERN (pat);
37458 gcc_assert (GET_CODE (pat) == PARALLEL);
37460 if (XVECLEN (pat, 0) == 2)
37462 rtx p0 = XVECEXP (pat, 0, 0);
37463 rtx p1 = XVECEXP (pat, 0, 1);
37465 gcc_assert (GET_CODE (p0) == SET
37466 && GET_CODE (p1) == UNSPEC
37467 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37473 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37477 for (; i < XVECLEN (pat, 0); ++i)
37479 rtx elem = XVECEXP (pat, 0, i);
37480 if (GET_CODE (elem) != UNSPEC
37481 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37485 /* No more than 1 occurence was removed. */
37486 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37488 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37492 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37495 ix86_expand_sse_comi_round (const struct builtin_description *d,
37496 tree exp, rtx target)
37499 tree arg0 = CALL_EXPR_ARG (exp, 0);
37500 tree arg1 = CALL_EXPR_ARG (exp, 1);
37501 tree arg2 = CALL_EXPR_ARG (exp, 2);
37502 tree arg3 = CALL_EXPR_ARG (exp, 3);
37503 rtx op0 = expand_normal (arg0);
37504 rtx op1 = expand_normal (arg1);
37505 rtx op2 = expand_normal (arg2);
37506 rtx op3 = expand_normal (arg3);
37507 enum insn_code icode = d->icode;
37508 const struct insn_data_d *insn_p = &insn_data[icode];
37509 machine_mode mode0 = insn_p->operand[0].mode;
37510 machine_mode mode1 = insn_p->operand[1].mode;
37511 enum rtx_code comparison = UNEQ;
37512 bool need_ucomi = false;
37514 /* See avxintrin.h for values. */
37515 enum rtx_code comi_comparisons[32] =
37517 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37518 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37519 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37521 bool need_ucomi_values[32] =
37523 true, false, false, true, true, false, false, true,
37524 true, false, false, true, true, false, false, true,
37525 false, true, true, false, false, true, true, false,
37526 false, true, true, false, false, true, true, false
37529 if (!CONST_INT_P (op2))
37531 error ("the third argument must be comparison constant");
37534 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37536 error ("incorect comparison mode");
37540 if (!insn_p->operand[2].predicate (op3, SImode))
37542 error ("incorrect rounding operand");
37546 comparison = comi_comparisons[INTVAL (op2)];
37547 need_ucomi = need_ucomi_values[INTVAL (op2)];
37549 if (VECTOR_MODE_P (mode0))
37550 op0 = safe_vector_operand (op0, mode0);
37551 if (VECTOR_MODE_P (mode1))
37552 op1 = safe_vector_operand (op1, mode1);
37554 target = gen_reg_rtx (SImode);
37555 emit_move_insn (target, const0_rtx);
37556 target = gen_rtx_SUBREG (QImode, target, 0);
37558 if ((optimize && !register_operand (op0, mode0))
37559 || !insn_p->operand[0].predicate (op0, mode0))
37560 op0 = copy_to_mode_reg (mode0, op0);
37561 if ((optimize && !register_operand (op1, mode1))
37562 || !insn_p->operand[1].predicate (op1, mode1))
37563 op1 = copy_to_mode_reg (mode1, op1);
37566 icode = icode == CODE_FOR_sse_comi_round
37567 ? CODE_FOR_sse_ucomi_round
37568 : CODE_FOR_sse2_ucomi_round;
37570 pat = GEN_FCN (icode) (op0, op1, op3);
37574 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37575 if (INTVAL (op3) == NO_ROUND)
37577 pat = ix86_erase_embedded_rounding (pat);
37581 set_dst = SET_DEST (pat);
37585 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37586 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37590 emit_insn (gen_rtx_SET (VOIDmode,
37591 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37592 gen_rtx_fmt_ee (comparison, QImode,
37596 return SUBREG_REG (target);
37600 ix86_expand_round_builtin (const struct builtin_description *d,
37601 tree exp, rtx target)
37604 unsigned int i, nargs;
37610 enum insn_code icode = d->icode;
37611 const struct insn_data_d *insn_p = &insn_data[icode];
37612 machine_mode tmode = insn_p->operand[0].mode;
37613 unsigned int nargs_constant = 0;
37614 unsigned int redundant_embed_rnd = 0;
37616 switch ((enum ix86_builtin_func_type) d->flag)
37618 case UINT64_FTYPE_V2DF_INT:
37619 case UINT64_FTYPE_V4SF_INT:
37620 case UINT_FTYPE_V2DF_INT:
37621 case UINT_FTYPE_V4SF_INT:
37622 case INT64_FTYPE_V2DF_INT:
37623 case INT64_FTYPE_V4SF_INT:
37624 case INT_FTYPE_V2DF_INT:
37625 case INT_FTYPE_V4SF_INT:
37628 case V4SF_FTYPE_V4SF_UINT_INT:
37629 case V4SF_FTYPE_V4SF_UINT64_INT:
37630 case V2DF_FTYPE_V2DF_UINT64_INT:
37631 case V4SF_FTYPE_V4SF_INT_INT:
37632 case V4SF_FTYPE_V4SF_INT64_INT:
37633 case V2DF_FTYPE_V2DF_INT64_INT:
37634 case V4SF_FTYPE_V4SF_V4SF_INT:
37635 case V2DF_FTYPE_V2DF_V2DF_INT:
37636 case V4SF_FTYPE_V4SF_V2DF_INT:
37637 case V2DF_FTYPE_V2DF_V4SF_INT:
37640 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37641 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37642 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37643 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37644 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37645 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37646 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37647 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37648 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37649 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37650 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37651 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37652 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37653 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37656 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37657 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37658 nargs_constant = 2;
37661 case INT_FTYPE_V4SF_V4SF_INT_INT:
37662 case INT_FTYPE_V2DF_V2DF_INT_INT:
37663 return ix86_expand_sse_comi_round (d, exp, target);
37664 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37665 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37666 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37667 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37668 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37669 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37672 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37673 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37674 nargs_constant = 4;
37677 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
37678 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
37679 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
37680 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
37681 nargs_constant = 3;
37684 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37685 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37686 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37687 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37689 nargs_constant = 4;
37691 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37692 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37693 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37694 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37696 nargs_constant = 3;
37699 gcc_unreachable ();
37701 gcc_assert (nargs <= ARRAY_SIZE (args));
37705 || GET_MODE (target) != tmode
37706 || !insn_p->operand[0].predicate (target, tmode))
37707 target = gen_reg_rtx (tmode);
37709 for (i = 0; i < nargs; i++)
37711 tree arg = CALL_EXPR_ARG (exp, i);
37712 rtx op = expand_normal (arg);
37713 machine_mode mode = insn_p->operand[i + 1].mode;
37714 bool match = insn_p->operand[i + 1].predicate (op, mode);
37716 if (i == nargs - nargs_constant)
37722 case CODE_FOR_avx512f_getmantv8df_mask_round:
37723 case CODE_FOR_avx512f_getmantv16sf_mask_round:
37724 case CODE_FOR_avx512f_vgetmantv2df_round:
37725 case CODE_FOR_avx512f_vgetmantv4sf_round:
37726 error ("the immediate argument must be a 4-bit immediate");
37728 case CODE_FOR_avx512f_cmpv8df3_mask_round:
37729 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
37730 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
37731 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
37732 error ("the immediate argument must be a 5-bit immediate");
37735 error ("the immediate argument must be an 8-bit immediate");
37740 else if (i == nargs-1)
37742 if (!insn_p->operand[nargs].predicate (op, SImode))
37744 error ("incorrect rounding operand");
37748 /* If there is no rounding use normal version of the pattern. */
37749 if (INTVAL (op) == NO_ROUND)
37750 redundant_embed_rnd = 1;
37754 if (VECTOR_MODE_P (mode))
37755 op = safe_vector_operand (op, mode);
37757 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37759 if (optimize || !match)
37760 op = copy_to_mode_reg (mode, op);
37764 op = copy_to_reg (op);
37765 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37770 args[i].mode = mode;
37776 pat = GEN_FCN (icode) (target, args[0].op);
37779 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37782 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37786 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37787 args[2].op, args[3].op);
37790 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37791 args[2].op, args[3].op, args[4].op);
37793 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37794 args[2].op, args[3].op, args[4].op,
37798 gcc_unreachable ();
37804 if (redundant_embed_rnd)
37805 pat = ix86_erase_embedded_rounding (pat);
37811 /* Subroutine of ix86_expand_builtin to take care of special insns
37812 with variable number of operands. */
37815 ix86_expand_special_args_builtin (const struct builtin_description *d,
37816 tree exp, rtx target)
37820 unsigned int i, nargs, arg_adjust, memory;
37821 bool aligned_mem = false;
37827 enum insn_code icode = d->icode;
37828 bool last_arg_constant = false;
37829 const struct insn_data_d *insn_p = &insn_data[icode];
37830 machine_mode tmode = insn_p->operand[0].mode;
37831 enum { load, store } klass;
37833 switch ((enum ix86_builtin_func_type) d->flag)
37835 case VOID_FTYPE_VOID:
37836 emit_insn (GEN_FCN (icode) (target));
37838 case VOID_FTYPE_UINT64:
37839 case VOID_FTYPE_UNSIGNED:
37845 case INT_FTYPE_VOID:
37846 case USHORT_FTYPE_VOID:
37847 case UINT64_FTYPE_VOID:
37848 case UNSIGNED_FTYPE_VOID:
37853 case UINT64_FTYPE_PUNSIGNED:
37854 case V2DI_FTYPE_PV2DI:
37855 case V4DI_FTYPE_PV4DI:
37856 case V32QI_FTYPE_PCCHAR:
37857 case V16QI_FTYPE_PCCHAR:
37858 case V8SF_FTYPE_PCV4SF:
37859 case V8SF_FTYPE_PCFLOAT:
37860 case V4SF_FTYPE_PCFLOAT:
37861 case V4DF_FTYPE_PCV2DF:
37862 case V4DF_FTYPE_PCDOUBLE:
37863 case V2DF_FTYPE_PCDOUBLE:
37864 case VOID_FTYPE_PVOID:
37865 case V16SI_FTYPE_PV4SI:
37866 case V16SF_FTYPE_PV4SF:
37867 case V8DI_FTYPE_PV4DI:
37868 case V8DI_FTYPE_PV8DI:
37869 case V8DF_FTYPE_PV4DF:
37875 case CODE_FOR_sse4_1_movntdqa:
37876 case CODE_FOR_avx2_movntdqa:
37877 case CODE_FOR_avx512f_movntdqa:
37878 aligned_mem = true;
37884 case VOID_FTYPE_PV2SF_V4SF:
37885 case VOID_FTYPE_PV8DI_V8DI:
37886 case VOID_FTYPE_PV4DI_V4DI:
37887 case VOID_FTYPE_PV2DI_V2DI:
37888 case VOID_FTYPE_PCHAR_V32QI:
37889 case VOID_FTYPE_PCHAR_V16QI:
37890 case VOID_FTYPE_PFLOAT_V16SF:
37891 case VOID_FTYPE_PFLOAT_V8SF:
37892 case VOID_FTYPE_PFLOAT_V4SF:
37893 case VOID_FTYPE_PDOUBLE_V8DF:
37894 case VOID_FTYPE_PDOUBLE_V4DF:
37895 case VOID_FTYPE_PDOUBLE_V2DF:
37896 case VOID_FTYPE_PLONGLONG_LONGLONG:
37897 case VOID_FTYPE_PULONGLONG_ULONGLONG:
37898 case VOID_FTYPE_PINT_INT:
37901 /* Reserve memory operand for target. */
37902 memory = ARRAY_SIZE (args);
37905 /* These builtins and instructions require the memory
37906 to be properly aligned. */
37907 case CODE_FOR_avx_movntv4di:
37908 case CODE_FOR_sse2_movntv2di:
37909 case CODE_FOR_avx_movntv8sf:
37910 case CODE_FOR_sse_movntv4sf:
37911 case CODE_FOR_sse4a_vmmovntv4sf:
37912 case CODE_FOR_avx_movntv4df:
37913 case CODE_FOR_sse2_movntv2df:
37914 case CODE_FOR_sse4a_vmmovntv2df:
37915 case CODE_FOR_sse2_movntidi:
37916 case CODE_FOR_sse_movntq:
37917 case CODE_FOR_sse2_movntisi:
37918 case CODE_FOR_avx512f_movntv16sf:
37919 case CODE_FOR_avx512f_movntv8df:
37920 case CODE_FOR_avx512f_movntv8di:
37921 aligned_mem = true;
37927 case V4SF_FTYPE_V4SF_PCV2SF:
37928 case V2DF_FTYPE_V2DF_PCDOUBLE:
37933 case V8SF_FTYPE_PCV8SF_V8SI:
37934 case V4DF_FTYPE_PCV4DF_V4DI:
37935 case V4SF_FTYPE_PCV4SF_V4SI:
37936 case V2DF_FTYPE_PCV2DF_V2DI:
37937 case V8SI_FTYPE_PCV8SI_V8SI:
37938 case V4DI_FTYPE_PCV4DI_V4DI:
37939 case V4SI_FTYPE_PCV4SI_V4SI:
37940 case V2DI_FTYPE_PCV2DI_V2DI:
37945 case VOID_FTYPE_PV8DF_V8DF_QI:
37946 case VOID_FTYPE_PV16SF_V16SF_HI:
37947 case VOID_FTYPE_PV8DI_V8DI_QI:
37948 case VOID_FTYPE_PV4DI_V4DI_QI:
37949 case VOID_FTYPE_PV2DI_V2DI_QI:
37950 case VOID_FTYPE_PV16SI_V16SI_HI:
37951 case VOID_FTYPE_PV8SI_V8SI_QI:
37952 case VOID_FTYPE_PV4SI_V4SI_QI:
37955 /* These builtins and instructions require the memory
37956 to be properly aligned. */
37957 case CODE_FOR_avx512f_storev16sf_mask:
37958 case CODE_FOR_avx512f_storev16si_mask:
37959 case CODE_FOR_avx512f_storev8df_mask:
37960 case CODE_FOR_avx512f_storev8di_mask:
37961 case CODE_FOR_avx512vl_storev8sf_mask:
37962 case CODE_FOR_avx512vl_storev8si_mask:
37963 case CODE_FOR_avx512vl_storev4df_mask:
37964 case CODE_FOR_avx512vl_storev4di_mask:
37965 case CODE_FOR_avx512vl_storev4sf_mask:
37966 case CODE_FOR_avx512vl_storev4si_mask:
37967 case CODE_FOR_avx512vl_storev2df_mask:
37968 case CODE_FOR_avx512vl_storev2di_mask:
37969 aligned_mem = true;
37975 case VOID_FTYPE_PV8SF_V8SI_V8SF:
37976 case VOID_FTYPE_PV4DF_V4DI_V4DF:
37977 case VOID_FTYPE_PV4SF_V4SI_V4SF:
37978 case VOID_FTYPE_PV2DF_V2DI_V2DF:
37979 case VOID_FTYPE_PV8SI_V8SI_V8SI:
37980 case VOID_FTYPE_PV4DI_V4DI_V4DI:
37981 case VOID_FTYPE_PV4SI_V4SI_V4SI:
37982 case VOID_FTYPE_PV2DI_V2DI_V2DI:
37983 case VOID_FTYPE_PDOUBLE_V2DF_QI:
37984 case VOID_FTYPE_PFLOAT_V4SF_QI:
37985 case VOID_FTYPE_PV8SI_V8DI_QI:
37986 case VOID_FTYPE_PV8HI_V8DI_QI:
37987 case VOID_FTYPE_PV16HI_V16SI_HI:
37988 case VOID_FTYPE_PV16QI_V8DI_QI:
37989 case VOID_FTYPE_PV16QI_V16SI_HI:
37990 case VOID_FTYPE_PV4SI_V4DI_QI:
37991 case VOID_FTYPE_PV4SI_V2DI_QI:
37992 case VOID_FTYPE_PV8HI_V4DI_QI:
37993 case VOID_FTYPE_PV8HI_V2DI_QI:
37994 case VOID_FTYPE_PV8HI_V8SI_QI:
37995 case VOID_FTYPE_PV8HI_V4SI_QI:
37996 case VOID_FTYPE_PV16QI_V4DI_QI:
37997 case VOID_FTYPE_PV16QI_V2DI_QI:
37998 case VOID_FTYPE_PV16QI_V8SI_QI:
37999 case VOID_FTYPE_PV16QI_V4SI_QI:
38000 case VOID_FTYPE_PV8HI_V8HI_QI:
38001 case VOID_FTYPE_PV16HI_V16HI_HI:
38002 case VOID_FTYPE_PV32HI_V32HI_SI:
38003 case VOID_FTYPE_PV16QI_V16QI_HI:
38004 case VOID_FTYPE_PV32QI_V32QI_SI:
38005 case VOID_FTYPE_PV64QI_V64QI_DI:
38006 case VOID_FTYPE_PV4DF_V4DF_QI:
38007 case VOID_FTYPE_PV2DF_V2DF_QI:
38008 case VOID_FTYPE_PV8SF_V8SF_QI:
38009 case VOID_FTYPE_PV4SF_V4SF_QI:
38012 /* Reserve memory operand for target. */
38013 memory = ARRAY_SIZE (args);
38015 case V4SF_FTYPE_PCV4SF_V4SF_QI:
38016 case V8SF_FTYPE_PCV8SF_V8SF_QI:
38017 case V16SF_FTYPE_PCV16SF_V16SF_HI:
38018 case V4SI_FTYPE_PCV4SI_V4SI_QI:
38019 case V8SI_FTYPE_PCV8SI_V8SI_QI:
38020 case V16SI_FTYPE_PCV16SI_V16SI_HI:
38021 case V2DF_FTYPE_PCV2DF_V2DF_QI:
38022 case V4DF_FTYPE_PCV4DF_V4DF_QI:
38023 case V8DF_FTYPE_PCV8DF_V8DF_QI:
38024 case V2DI_FTYPE_PCV2DI_V2DI_QI:
38025 case V4DI_FTYPE_PCV4DI_V4DI_QI:
38026 case V8DI_FTYPE_PCV8DI_V8DI_QI:
38027 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
38028 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
38029 case V8HI_FTYPE_PCV8HI_V8HI_QI:
38030 case V16HI_FTYPE_PCV16HI_V16HI_HI:
38031 case V32HI_FTYPE_PCV32HI_V32HI_SI:
38032 case V16QI_FTYPE_PCV16QI_V16QI_HI:
38033 case V32QI_FTYPE_PCV32QI_V32QI_SI:
38034 case V64QI_FTYPE_PCV64QI_V64QI_DI:
38040 /* These builtins and instructions require the memory
38041 to be properly aligned. */
38042 case CODE_FOR_avx512f_loadv16sf_mask:
38043 case CODE_FOR_avx512f_loadv16si_mask:
38044 case CODE_FOR_avx512f_loadv8df_mask:
38045 case CODE_FOR_avx512f_loadv8di_mask:
38046 case CODE_FOR_avx512vl_loadv8sf_mask:
38047 case CODE_FOR_avx512vl_loadv8si_mask:
38048 case CODE_FOR_avx512vl_loadv4df_mask:
38049 case CODE_FOR_avx512vl_loadv4di_mask:
38050 case CODE_FOR_avx512vl_loadv4sf_mask:
38051 case CODE_FOR_avx512vl_loadv4si_mask:
38052 case CODE_FOR_avx512vl_loadv2df_mask:
38053 case CODE_FOR_avx512vl_loadv2di_mask:
38054 case CODE_FOR_avx512bw_loadv64qi_mask:
38055 case CODE_FOR_avx512vl_loadv32qi_mask:
38056 case CODE_FOR_avx512vl_loadv16qi_mask:
38057 case CODE_FOR_avx512bw_loadv32hi_mask:
38058 case CODE_FOR_avx512vl_loadv16hi_mask:
38059 case CODE_FOR_avx512vl_loadv8hi_mask:
38060 aligned_mem = true;
38066 case VOID_FTYPE_UINT_UINT_UINT:
38067 case VOID_FTYPE_UINT64_UINT_UINT:
38068 case UCHAR_FTYPE_UINT_UINT_UINT:
38069 case UCHAR_FTYPE_UINT64_UINT_UINT:
38072 memory = ARRAY_SIZE (args);
38073 last_arg_constant = true;
38076 gcc_unreachable ();
38079 gcc_assert (nargs <= ARRAY_SIZE (args));
38081 if (klass == store)
38083 arg = CALL_EXPR_ARG (exp, 0);
38084 op = expand_normal (arg);
38085 gcc_assert (target == 0);
38088 op = ix86_zero_extend_to_Pmode (op);
38089 target = gen_rtx_MEM (tmode, op);
38090 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38091 on it. Try to improve it using get_pointer_alignment,
38092 and if the special builtin is one that requires strict
38093 mode alignment, also from it's GET_MODE_ALIGNMENT.
38094 Failure to do so could lead to ix86_legitimate_combined_insn
38095 rejecting all changes to such insns. */
38096 unsigned int align = get_pointer_alignment (arg);
38097 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38098 align = GET_MODE_ALIGNMENT (tmode);
38099 if (MEM_ALIGN (target) < align)
38100 set_mem_align (target, align);
38103 target = force_reg (tmode, op);
38111 || !register_operand (target, tmode)
38112 || GET_MODE (target) != tmode)
38113 target = gen_reg_rtx (tmode);
38116 for (i = 0; i < nargs; i++)
38118 machine_mode mode = insn_p->operand[i + 1].mode;
38121 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38122 op = expand_normal (arg);
38123 match = insn_p->operand[i + 1].predicate (op, mode);
38125 if (last_arg_constant && (i + 1) == nargs)
38129 if (icode == CODE_FOR_lwp_lwpvalsi3
38130 || icode == CODE_FOR_lwp_lwpinssi3
38131 || icode == CODE_FOR_lwp_lwpvaldi3
38132 || icode == CODE_FOR_lwp_lwpinsdi3)
38133 error ("the last argument must be a 32-bit immediate");
38135 error ("the last argument must be an 8-bit immediate");
38143 /* This must be the memory operand. */
38144 op = ix86_zero_extend_to_Pmode (op);
38145 op = gen_rtx_MEM (mode, op);
38146 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38147 on it. Try to improve it using get_pointer_alignment,
38148 and if the special builtin is one that requires strict
38149 mode alignment, also from it's GET_MODE_ALIGNMENT.
38150 Failure to do so could lead to ix86_legitimate_combined_insn
38151 rejecting all changes to such insns. */
38152 unsigned int align = get_pointer_alignment (arg);
38153 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38154 align = GET_MODE_ALIGNMENT (mode);
38155 if (MEM_ALIGN (op) < align)
38156 set_mem_align (op, align);
38160 /* This must be register. */
38161 if (VECTOR_MODE_P (mode))
38162 op = safe_vector_operand (op, mode);
38164 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38165 op = copy_to_mode_reg (mode, op);
38168 op = copy_to_reg (op);
38169 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38175 args[i].mode = mode;
38181 pat = GEN_FCN (icode) (target);
38184 pat = GEN_FCN (icode) (target, args[0].op);
38187 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38190 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38193 gcc_unreachable ();
38199 return klass == store ? 0 : target;
38202 /* Return the integer constant in ARG. Constrain it to be in the range
38203 of the subparts of VEC_TYPE; issue an error if not. */
38206 get_element_number (tree vec_type, tree arg)
38208 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38210 if (!tree_fits_uhwi_p (arg)
38211 || (elt = tree_to_uhwi (arg), elt > max))
38213 error ("selector must be an integer constant in the range 0..%wi", max);
38220 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38221 ix86_expand_vector_init. We DO have language-level syntax for this, in
38222 the form of (type){ init-list }. Except that since we can't place emms
38223 instructions from inside the compiler, we can't allow the use of MMX
38224 registers unless the user explicitly asks for it. So we do *not* define
38225 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
38226 we have builtins invoked by mmintrin.h that gives us license to emit
38227 these sorts of instructions. */
38230 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38232 machine_mode tmode = TYPE_MODE (type);
38233 machine_mode inner_mode = GET_MODE_INNER (tmode);
38234 int i, n_elt = GET_MODE_NUNITS (tmode);
38235 rtvec v = rtvec_alloc (n_elt);
38237 gcc_assert (VECTOR_MODE_P (tmode));
38238 gcc_assert (call_expr_nargs (exp) == n_elt);
38240 for (i = 0; i < n_elt; ++i)
38242 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38243 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38246 if (!target || !register_operand (target, tmode))
38247 target = gen_reg_rtx (tmode);
38249 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38253 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38254 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38255 had a language-level syntax for referencing vector elements. */
38258 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38260 machine_mode tmode, mode0;
38265 arg0 = CALL_EXPR_ARG (exp, 0);
38266 arg1 = CALL_EXPR_ARG (exp, 1);
38268 op0 = expand_normal (arg0);
38269 elt = get_element_number (TREE_TYPE (arg0), arg1);
38271 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38272 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38273 gcc_assert (VECTOR_MODE_P (mode0));
38275 op0 = force_reg (mode0, op0);
38277 if (optimize || !target || !register_operand (target, tmode))
38278 target = gen_reg_rtx (tmode);
38280 ix86_expand_vector_extract (true, target, op0, elt);
38285 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38286 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38287 a language-level syntax for referencing vector elements. */
38290 ix86_expand_vec_set_builtin (tree exp)
38292 machine_mode tmode, mode1;
38293 tree arg0, arg1, arg2;
38295 rtx op0, op1, target;
38297 arg0 = CALL_EXPR_ARG (exp, 0);
38298 arg1 = CALL_EXPR_ARG (exp, 1);
38299 arg2 = CALL_EXPR_ARG (exp, 2);
38301 tmode = TYPE_MODE (TREE_TYPE (arg0));
38302 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38303 gcc_assert (VECTOR_MODE_P (tmode));
38305 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38306 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38307 elt = get_element_number (TREE_TYPE (arg0), arg2);
38309 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38310 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38312 op0 = force_reg (tmode, op0);
38313 op1 = force_reg (mode1, op1);
38315 /* OP0 is the source of these builtin functions and shouldn't be
38316 modified. Create a copy, use it and return it as target. */
38317 target = gen_reg_rtx (tmode);
38318 emit_move_insn (target, op0);
38319 ix86_expand_vector_set (true, target, op1, elt);
38324 /* Emit conditional move of SRC to DST with condition
38327 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38333 t = ix86_expand_compare (code, op1, op2);
38334 emit_insn (gen_rtx_SET (VOIDmode, dst,
38335 gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38340 rtx nomove = gen_label_rtx ();
38341 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38342 const0_rtx, GET_MODE (op1), 1, nomove);
38343 emit_move_insn (dst, src);
38344 emit_label (nomove);
38348 /* Choose max of DST and SRC and put it to DST. */
38350 ix86_emit_move_max (rtx dst, rtx src)
38352 ix86_emit_cmove (dst, src, LTU, dst, src);
38355 /* Expand an expression EXP that calls a built-in function,
38356 with result going to TARGET if that's convenient
38357 (and in mode MODE if that's convenient).
38358 SUBTARGET may be used as the target for computing one of EXP's operands.
38359 IGNORE is nonzero if the value is to be ignored. */
38362 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38363 machine_mode mode, int ignore)
38365 const struct builtin_description *d;
38367 enum insn_code icode;
38368 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38369 tree arg0, arg1, arg2, arg3, arg4;
38370 rtx op0, op1, op2, op3, op4, pat, insn;
38371 machine_mode mode0, mode1, mode2, mode3, mode4;
38372 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38374 /* For CPU builtins that can be folded, fold first and expand the fold. */
38377 case IX86_BUILTIN_CPU_INIT:
38379 /* Make it call __cpu_indicator_init in libgcc. */
38380 tree call_expr, fndecl, type;
38381 type = build_function_type_list (integer_type_node, NULL_TREE);
38382 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38383 call_expr = build_call_expr (fndecl, 0);
38384 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38386 case IX86_BUILTIN_CPU_IS:
38387 case IX86_BUILTIN_CPU_SUPPORTS:
38389 tree arg0 = CALL_EXPR_ARG (exp, 0);
38390 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38391 gcc_assert (fold_expr != NULL_TREE);
38392 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38396 /* Determine whether the builtin function is available under the current ISA.
38397 Originally the builtin was not created if it wasn't applicable to the
38398 current ISA based on the command line switches. With function specific
38399 options, we need to check in the context of the function making the call
38400 whether it is supported. */
38401 if (ix86_builtins_isa[fcode].isa
38402 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38404 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38405 NULL, (enum fpmath_unit) 0, false);
38408 error ("%qE needs unknown isa option", fndecl);
38411 gcc_assert (opts != NULL);
38412 error ("%qE needs isa option %s", fndecl, opts);
38420 case IX86_BUILTIN_BNDMK:
38422 || GET_MODE (target) != BNDmode
38423 || !register_operand (target, BNDmode))
38424 target = gen_reg_rtx (BNDmode);
38426 arg0 = CALL_EXPR_ARG (exp, 0);
38427 arg1 = CALL_EXPR_ARG (exp, 1);
38429 op0 = expand_normal (arg0);
38430 op1 = expand_normal (arg1);
38432 if (!register_operand (op0, Pmode))
38433 op0 = ix86_zero_extend_to_Pmode (op0);
38434 if (!register_operand (op1, Pmode))
38435 op1 = ix86_zero_extend_to_Pmode (op1);
38437 /* Builtin arg1 is size of block but instruction op1 should
38439 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38440 NULL_RTX, 1, OPTAB_DIRECT);
38442 emit_insn (BNDmode == BND64mode
38443 ? gen_bnd64_mk (target, op0, op1)
38444 : gen_bnd32_mk (target, op0, op1));
38447 case IX86_BUILTIN_BNDSTX:
38448 arg0 = CALL_EXPR_ARG (exp, 0);
38449 arg1 = CALL_EXPR_ARG (exp, 1);
38450 arg2 = CALL_EXPR_ARG (exp, 2);
38452 op0 = expand_normal (arg0);
38453 op1 = expand_normal (arg1);
38454 op2 = expand_normal (arg2);
38456 if (!register_operand (op0, Pmode))
38457 op0 = ix86_zero_extend_to_Pmode (op0);
38458 if (!register_operand (op1, BNDmode))
38459 op1 = copy_to_mode_reg (BNDmode, op1);
38460 if (!register_operand (op2, Pmode))
38461 op2 = ix86_zero_extend_to_Pmode (op2);
38463 emit_insn (BNDmode == BND64mode
38464 ? gen_bnd64_stx (op2, op0, op1)
38465 : gen_bnd32_stx (op2, op0, op1));
38468 case IX86_BUILTIN_BNDLDX:
38470 || GET_MODE (target) != BNDmode
38471 || !register_operand (target, BNDmode))
38472 target = gen_reg_rtx (BNDmode);
38474 arg0 = CALL_EXPR_ARG (exp, 0);
38475 arg1 = CALL_EXPR_ARG (exp, 1);
38477 op0 = expand_normal (arg0);
38478 op1 = expand_normal (arg1);
38480 if (!register_operand (op0, Pmode))
38481 op0 = ix86_zero_extend_to_Pmode (op0);
38482 if (!register_operand (op1, Pmode))
38483 op1 = ix86_zero_extend_to_Pmode (op1);
38485 emit_insn (BNDmode == BND64mode
38486 ? gen_bnd64_ldx (target, op0, op1)
38487 : gen_bnd32_ldx (target, op0, op1));
38490 case IX86_BUILTIN_BNDCL:
38491 arg0 = CALL_EXPR_ARG (exp, 0);
38492 arg1 = CALL_EXPR_ARG (exp, 1);
38494 op0 = expand_normal (arg0);
38495 op1 = expand_normal (arg1);
38497 if (!register_operand (op0, Pmode))
38498 op0 = ix86_zero_extend_to_Pmode (op0);
38499 if (!register_operand (op1, BNDmode))
38500 op1 = copy_to_mode_reg (BNDmode, op1);
38502 emit_insn (BNDmode == BND64mode
38503 ? gen_bnd64_cl (op1, op0)
38504 : gen_bnd32_cl (op1, op0));
38507 case IX86_BUILTIN_BNDCU:
38508 arg0 = CALL_EXPR_ARG (exp, 0);
38509 arg1 = CALL_EXPR_ARG (exp, 1);
38511 op0 = expand_normal (arg0);
38512 op1 = expand_normal (arg1);
38514 if (!register_operand (op0, Pmode))
38515 op0 = ix86_zero_extend_to_Pmode (op0);
38516 if (!register_operand (op1, BNDmode))
38517 op1 = copy_to_mode_reg (BNDmode, op1);
38519 emit_insn (BNDmode == BND64mode
38520 ? gen_bnd64_cu (op1, op0)
38521 : gen_bnd32_cu (op1, op0));
38524 case IX86_BUILTIN_BNDRET:
38525 arg0 = CALL_EXPR_ARG (exp, 0);
38526 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38527 target = chkp_get_rtl_bounds (arg0);
38529 /* If no bounds were specified for returned value,
38530 then use INIT bounds. It usually happens when
38531 some built-in function is expanded. */
38534 rtx t1 = gen_reg_rtx (Pmode);
38535 rtx t2 = gen_reg_rtx (Pmode);
38536 target = gen_reg_rtx (BNDmode);
38537 emit_move_insn (t1, const0_rtx);
38538 emit_move_insn (t2, constm1_rtx);
38539 emit_insn (BNDmode == BND64mode
38540 ? gen_bnd64_mk (target, t1, t2)
38541 : gen_bnd32_mk (target, t1, t2));
38544 gcc_assert (target && REG_P (target));
38547 case IX86_BUILTIN_BNDNARROW:
38549 rtx m1, m1h1, m1h2, lb, ub, t1;
38551 /* Return value and lb. */
38552 arg0 = CALL_EXPR_ARG (exp, 0);
38554 arg1 = CALL_EXPR_ARG (exp, 1);
38556 arg2 = CALL_EXPR_ARG (exp, 2);
38558 lb = expand_normal (arg0);
38559 op1 = expand_normal (arg1);
38560 op2 = expand_normal (arg2);
38562 /* Size was passed but we need to use (size - 1) as for bndmk. */
38563 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38564 NULL_RTX, 1, OPTAB_DIRECT);
38566 /* Add LB to size and inverse to get UB. */
38567 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38568 op2, 1, OPTAB_DIRECT);
38569 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38571 if (!register_operand (lb, Pmode))
38572 lb = ix86_zero_extend_to_Pmode (lb);
38573 if (!register_operand (ub, Pmode))
38574 ub = ix86_zero_extend_to_Pmode (ub);
38576 /* We need to move bounds to memory before any computations. */
38581 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38582 emit_move_insn (m1, op1);
38585 /* Generate mem expression to be used for access to LB and UB. */
38586 m1h1 = adjust_address (m1, Pmode, 0);
38587 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38589 t1 = gen_reg_rtx (Pmode);
38592 emit_move_insn (t1, m1h1);
38593 ix86_emit_move_max (t1, lb);
38594 emit_move_insn (m1h1, t1);
38596 /* Compute UB. UB is stored in 1's complement form. Therefore
38597 we also use max here. */
38598 emit_move_insn (t1, m1h2);
38599 ix86_emit_move_max (t1, ub);
38600 emit_move_insn (m1h2, t1);
38602 op2 = gen_reg_rtx (BNDmode);
38603 emit_move_insn (op2, m1);
38605 return chkp_join_splitted_slot (lb, op2);
38608 case IX86_BUILTIN_BNDINT:
38610 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38613 || GET_MODE (target) != BNDmode
38614 || !register_operand (target, BNDmode))
38615 target = gen_reg_rtx (BNDmode);
38617 arg0 = CALL_EXPR_ARG (exp, 0);
38618 arg1 = CALL_EXPR_ARG (exp, 1);
38620 op0 = expand_normal (arg0);
38621 op1 = expand_normal (arg1);
38623 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38624 rh1 = adjust_address (res, Pmode, 0);
38625 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38627 /* Put first bounds to temporaries. */
38628 lb1 = gen_reg_rtx (Pmode);
38629 ub1 = gen_reg_rtx (Pmode);
38632 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38633 emit_move_insn (ub1, adjust_address (op0, Pmode,
38634 GET_MODE_SIZE (Pmode)));
38638 emit_move_insn (res, op0);
38639 emit_move_insn (lb1, rh1);
38640 emit_move_insn (ub1, rh2);
38643 /* Put second bounds to temporaries. */
38644 lb2 = gen_reg_rtx (Pmode);
38645 ub2 = gen_reg_rtx (Pmode);
38648 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38649 emit_move_insn (ub2, adjust_address (op1, Pmode,
38650 GET_MODE_SIZE (Pmode)));
38654 emit_move_insn (res, op1);
38655 emit_move_insn (lb2, rh1);
38656 emit_move_insn (ub2, rh2);
38660 ix86_emit_move_max (lb1, lb2);
38661 emit_move_insn (rh1, lb1);
38663 /* Compute UB. UB is stored in 1's complement form. Therefore
38664 we also use max here. */
38665 ix86_emit_move_max (ub1, ub2);
38666 emit_move_insn (rh2, ub1);
38668 emit_move_insn (target, res);
38673 case IX86_BUILTIN_SIZEOF:
38679 || GET_MODE (target) != Pmode
38680 || !register_operand (target, Pmode))
38681 target = gen_reg_rtx (Pmode);
38683 arg0 = CALL_EXPR_ARG (exp, 0);
38684 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
38686 name = DECL_ASSEMBLER_NAME (arg0);
38687 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
38689 emit_insn (Pmode == SImode
38690 ? gen_move_size_reloc_si (target, symbol)
38691 : gen_move_size_reloc_di (target, symbol));
38696 case IX86_BUILTIN_BNDLOWER:
38701 || GET_MODE (target) != Pmode
38702 || !register_operand (target, Pmode))
38703 target = gen_reg_rtx (Pmode);
38705 arg0 = CALL_EXPR_ARG (exp, 0);
38706 op0 = expand_normal (arg0);
38708 /* We need to move bounds to memory first. */
38713 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38714 emit_move_insn (mem, op0);
38717 /* Generate mem expression to access LB and load it. */
38718 hmem = adjust_address (mem, Pmode, 0);
38719 emit_move_insn (target, hmem);
38724 case IX86_BUILTIN_BNDUPPER:
38726 rtx mem, hmem, res;
38729 || GET_MODE (target) != Pmode
38730 || !register_operand (target, Pmode))
38731 target = gen_reg_rtx (Pmode);
38733 arg0 = CALL_EXPR_ARG (exp, 0);
38734 op0 = expand_normal (arg0);
38736 /* We need to move bounds to memory first. */
38741 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38742 emit_move_insn (mem, op0);
38745 /* Generate mem expression to access UB. */
38746 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
38748 /* We need to inverse all bits of UB. */
38749 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
38752 emit_move_insn (target, res);
38757 case IX86_BUILTIN_MASKMOVQ:
38758 case IX86_BUILTIN_MASKMOVDQU:
38759 icode = (fcode == IX86_BUILTIN_MASKMOVQ
38760 ? CODE_FOR_mmx_maskmovq
38761 : CODE_FOR_sse2_maskmovdqu);
38762 /* Note the arg order is different from the operand order. */
38763 arg1 = CALL_EXPR_ARG (exp, 0);
38764 arg2 = CALL_EXPR_ARG (exp, 1);
38765 arg0 = CALL_EXPR_ARG (exp, 2);
38766 op0 = expand_normal (arg0);
38767 op1 = expand_normal (arg1);
38768 op2 = expand_normal (arg2);
38769 mode0 = insn_data[icode].operand[0].mode;
38770 mode1 = insn_data[icode].operand[1].mode;
38771 mode2 = insn_data[icode].operand[2].mode;
38773 op0 = ix86_zero_extend_to_Pmode (op0);
38774 op0 = gen_rtx_MEM (mode1, op0);
38776 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38777 op0 = copy_to_mode_reg (mode0, op0);
38778 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38779 op1 = copy_to_mode_reg (mode1, op1);
38780 if (!insn_data[icode].operand[2].predicate (op2, mode2))
38781 op2 = copy_to_mode_reg (mode2, op2);
38782 pat = GEN_FCN (icode) (op0, op1, op2);
38788 case IX86_BUILTIN_LDMXCSR:
38789 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
38790 target = assign_386_stack_local (SImode, SLOT_TEMP);
38791 emit_move_insn (target, op0);
38792 emit_insn (gen_sse_ldmxcsr (target));
38795 case IX86_BUILTIN_STMXCSR:
38796 target = assign_386_stack_local (SImode, SLOT_TEMP);
38797 emit_insn (gen_sse_stmxcsr (target));
38798 return copy_to_mode_reg (SImode, target);
38800 case IX86_BUILTIN_CLFLUSH:
38801 arg0 = CALL_EXPR_ARG (exp, 0);
38802 op0 = expand_normal (arg0);
38803 icode = CODE_FOR_sse2_clflush;
38804 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38805 op0 = ix86_zero_extend_to_Pmode (op0);
38807 emit_insn (gen_sse2_clflush (op0));
38810 case IX86_BUILTIN_CLWB:
38811 arg0 = CALL_EXPR_ARG (exp, 0);
38812 op0 = expand_normal (arg0);
38813 icode = CODE_FOR_clwb;
38814 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38815 op0 = ix86_zero_extend_to_Pmode (op0);
38817 emit_insn (gen_clwb (op0));
38820 case IX86_BUILTIN_CLFLUSHOPT:
38821 arg0 = CALL_EXPR_ARG (exp, 0);
38822 op0 = expand_normal (arg0);
38823 icode = CODE_FOR_clflushopt;
38824 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38825 op0 = ix86_zero_extend_to_Pmode (op0);
38827 emit_insn (gen_clflushopt (op0));
38830 case IX86_BUILTIN_MONITOR:
38831 arg0 = CALL_EXPR_ARG (exp, 0);
38832 arg1 = CALL_EXPR_ARG (exp, 1);
38833 arg2 = CALL_EXPR_ARG (exp, 2);
38834 op0 = expand_normal (arg0);
38835 op1 = expand_normal (arg1);
38836 op2 = expand_normal (arg2);
38838 op0 = ix86_zero_extend_to_Pmode (op0);
38840 op1 = copy_to_mode_reg (SImode, op1);
38842 op2 = copy_to_mode_reg (SImode, op2);
38843 emit_insn (ix86_gen_monitor (op0, op1, op2));
38846 case IX86_BUILTIN_MWAIT:
38847 arg0 = CALL_EXPR_ARG (exp, 0);
38848 arg1 = CALL_EXPR_ARG (exp, 1);
38849 op0 = expand_normal (arg0);
38850 op1 = expand_normal (arg1);
38852 op0 = copy_to_mode_reg (SImode, op0);
38854 op1 = copy_to_mode_reg (SImode, op1);
38855 emit_insn (gen_sse3_mwait (op0, op1));
38858 case IX86_BUILTIN_VEC_INIT_V2SI:
38859 case IX86_BUILTIN_VEC_INIT_V4HI:
38860 case IX86_BUILTIN_VEC_INIT_V8QI:
38861 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
38863 case IX86_BUILTIN_VEC_EXT_V2DF:
38864 case IX86_BUILTIN_VEC_EXT_V2DI:
38865 case IX86_BUILTIN_VEC_EXT_V4SF:
38866 case IX86_BUILTIN_VEC_EXT_V4SI:
38867 case IX86_BUILTIN_VEC_EXT_V8HI:
38868 case IX86_BUILTIN_VEC_EXT_V2SI:
38869 case IX86_BUILTIN_VEC_EXT_V4HI:
38870 case IX86_BUILTIN_VEC_EXT_V16QI:
38871 return ix86_expand_vec_ext_builtin (exp, target);
38873 case IX86_BUILTIN_VEC_SET_V2DI:
38874 case IX86_BUILTIN_VEC_SET_V4SF:
38875 case IX86_BUILTIN_VEC_SET_V4SI:
38876 case IX86_BUILTIN_VEC_SET_V8HI:
38877 case IX86_BUILTIN_VEC_SET_V4HI:
38878 case IX86_BUILTIN_VEC_SET_V16QI:
38879 return ix86_expand_vec_set_builtin (exp);
38881 case IX86_BUILTIN_INFQ:
38882 case IX86_BUILTIN_HUGE_VALQ:
38884 REAL_VALUE_TYPE inf;
38888 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
38890 tmp = validize_mem (force_const_mem (mode, tmp));
38893 target = gen_reg_rtx (mode);
38895 emit_move_insn (target, tmp);
38899 case IX86_BUILTIN_RDPMC:
38900 case IX86_BUILTIN_RDTSC:
38901 case IX86_BUILTIN_RDTSCP:
38903 op0 = gen_reg_rtx (DImode);
38904 op1 = gen_reg_rtx (DImode);
38906 if (fcode == IX86_BUILTIN_RDPMC)
38908 arg0 = CALL_EXPR_ARG (exp, 0);
38909 op2 = expand_normal (arg0);
38910 if (!register_operand (op2, SImode))
38911 op2 = copy_to_mode_reg (SImode, op2);
38913 insn = (TARGET_64BIT
38914 ? gen_rdpmc_rex64 (op0, op1, op2)
38915 : gen_rdpmc (op0, op2));
38918 else if (fcode == IX86_BUILTIN_RDTSC)
38920 insn = (TARGET_64BIT
38921 ? gen_rdtsc_rex64 (op0, op1)
38922 : gen_rdtsc (op0));
38927 op2 = gen_reg_rtx (SImode);
38929 insn = (TARGET_64BIT
38930 ? gen_rdtscp_rex64 (op0, op1, op2)
38931 : gen_rdtscp (op0, op2));
38934 arg0 = CALL_EXPR_ARG (exp, 0);
38935 op4 = expand_normal (arg0);
38936 if (!address_operand (op4, VOIDmode))
38938 op4 = convert_memory_address (Pmode, op4);
38939 op4 = copy_addr_to_reg (op4);
38941 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
38946 /* mode is VOIDmode if __builtin_rd* has been called
38948 if (mode == VOIDmode)
38950 target = gen_reg_rtx (mode);
38955 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
38956 op1, 1, OPTAB_DIRECT);
38957 op0 = expand_simple_binop (DImode, IOR, op0, op1,
38958 op0, 1, OPTAB_DIRECT);
38961 emit_move_insn (target, op0);
38964 case IX86_BUILTIN_FXSAVE:
38965 case IX86_BUILTIN_FXRSTOR:
38966 case IX86_BUILTIN_FXSAVE64:
38967 case IX86_BUILTIN_FXRSTOR64:
38968 case IX86_BUILTIN_FNSTENV:
38969 case IX86_BUILTIN_FLDENV:
38973 case IX86_BUILTIN_FXSAVE:
38974 icode = CODE_FOR_fxsave;
38976 case IX86_BUILTIN_FXRSTOR:
38977 icode = CODE_FOR_fxrstor;
38979 case IX86_BUILTIN_FXSAVE64:
38980 icode = CODE_FOR_fxsave64;
38982 case IX86_BUILTIN_FXRSTOR64:
38983 icode = CODE_FOR_fxrstor64;
38985 case IX86_BUILTIN_FNSTENV:
38986 icode = CODE_FOR_fnstenv;
38988 case IX86_BUILTIN_FLDENV:
38989 icode = CODE_FOR_fldenv;
38992 gcc_unreachable ();
38995 arg0 = CALL_EXPR_ARG (exp, 0);
38996 op0 = expand_normal (arg0);
38998 if (!address_operand (op0, VOIDmode))
39000 op0 = convert_memory_address (Pmode, op0);
39001 op0 = copy_addr_to_reg (op0);
39003 op0 = gen_rtx_MEM (mode0, op0);
39005 pat = GEN_FCN (icode) (op0);
39010 case IX86_BUILTIN_XSAVE:
39011 case IX86_BUILTIN_XRSTOR:
39012 case IX86_BUILTIN_XSAVE64:
39013 case IX86_BUILTIN_XRSTOR64:
39014 case IX86_BUILTIN_XSAVEOPT:
39015 case IX86_BUILTIN_XSAVEOPT64:
39016 case IX86_BUILTIN_XSAVES:
39017 case IX86_BUILTIN_XRSTORS:
39018 case IX86_BUILTIN_XSAVES64:
39019 case IX86_BUILTIN_XRSTORS64:
39020 case IX86_BUILTIN_XSAVEC:
39021 case IX86_BUILTIN_XSAVEC64:
39022 arg0 = CALL_EXPR_ARG (exp, 0);
39023 arg1 = CALL_EXPR_ARG (exp, 1);
39024 op0 = expand_normal (arg0);
39025 op1 = expand_normal (arg1);
39027 if (!address_operand (op0, VOIDmode))
39029 op0 = convert_memory_address (Pmode, op0);
39030 op0 = copy_addr_to_reg (op0);
39032 op0 = gen_rtx_MEM (BLKmode, op0);
39034 op1 = force_reg (DImode, op1);
39038 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
39039 NULL, 1, OPTAB_DIRECT);
39042 case IX86_BUILTIN_XSAVE:
39043 icode = CODE_FOR_xsave_rex64;
39045 case IX86_BUILTIN_XRSTOR:
39046 icode = CODE_FOR_xrstor_rex64;
39048 case IX86_BUILTIN_XSAVE64:
39049 icode = CODE_FOR_xsave64;
39051 case IX86_BUILTIN_XRSTOR64:
39052 icode = CODE_FOR_xrstor64;
39054 case IX86_BUILTIN_XSAVEOPT:
39055 icode = CODE_FOR_xsaveopt_rex64;
39057 case IX86_BUILTIN_XSAVEOPT64:
39058 icode = CODE_FOR_xsaveopt64;
39060 case IX86_BUILTIN_XSAVES:
39061 icode = CODE_FOR_xsaves_rex64;
39063 case IX86_BUILTIN_XRSTORS:
39064 icode = CODE_FOR_xrstors_rex64;
39066 case IX86_BUILTIN_XSAVES64:
39067 icode = CODE_FOR_xsaves64;
39069 case IX86_BUILTIN_XRSTORS64:
39070 icode = CODE_FOR_xrstors64;
39072 case IX86_BUILTIN_XSAVEC:
39073 icode = CODE_FOR_xsavec_rex64;
39075 case IX86_BUILTIN_XSAVEC64:
39076 icode = CODE_FOR_xsavec64;
39079 gcc_unreachable ();
39082 op2 = gen_lowpart (SImode, op2);
39083 op1 = gen_lowpart (SImode, op1);
39084 pat = GEN_FCN (icode) (op0, op1, op2);
39090 case IX86_BUILTIN_XSAVE:
39091 icode = CODE_FOR_xsave;
39093 case IX86_BUILTIN_XRSTOR:
39094 icode = CODE_FOR_xrstor;
39096 case IX86_BUILTIN_XSAVEOPT:
39097 icode = CODE_FOR_xsaveopt;
39099 case IX86_BUILTIN_XSAVES:
39100 icode = CODE_FOR_xsaves;
39102 case IX86_BUILTIN_XRSTORS:
39103 icode = CODE_FOR_xrstors;
39105 case IX86_BUILTIN_XSAVEC:
39106 icode = CODE_FOR_xsavec;
39109 gcc_unreachable ();
39111 pat = GEN_FCN (icode) (op0, op1);
39118 case IX86_BUILTIN_LLWPCB:
39119 arg0 = CALL_EXPR_ARG (exp, 0);
39120 op0 = expand_normal (arg0);
39121 icode = CODE_FOR_lwp_llwpcb;
39122 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39123 op0 = ix86_zero_extend_to_Pmode (op0);
39124 emit_insn (gen_lwp_llwpcb (op0));
39127 case IX86_BUILTIN_SLWPCB:
39128 icode = CODE_FOR_lwp_slwpcb;
39130 || !insn_data[icode].operand[0].predicate (target, Pmode))
39131 target = gen_reg_rtx (Pmode);
39132 emit_insn (gen_lwp_slwpcb (target));
39135 case IX86_BUILTIN_BEXTRI32:
39136 case IX86_BUILTIN_BEXTRI64:
39137 arg0 = CALL_EXPR_ARG (exp, 0);
39138 arg1 = CALL_EXPR_ARG (exp, 1);
39139 op0 = expand_normal (arg0);
39140 op1 = expand_normal (arg1);
39141 icode = (fcode == IX86_BUILTIN_BEXTRI32
39142 ? CODE_FOR_tbm_bextri_si
39143 : CODE_FOR_tbm_bextri_di);
39144 if (!CONST_INT_P (op1))
39146 error ("last argument must be an immediate");
39151 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39152 unsigned char lsb_index = INTVAL (op1) & 0xFF;
39153 op1 = GEN_INT (length);
39154 op2 = GEN_INT (lsb_index);
39155 pat = GEN_FCN (icode) (target, op0, op1, op2);
39161 case IX86_BUILTIN_RDRAND16_STEP:
39162 icode = CODE_FOR_rdrandhi_1;
39166 case IX86_BUILTIN_RDRAND32_STEP:
39167 icode = CODE_FOR_rdrandsi_1;
39171 case IX86_BUILTIN_RDRAND64_STEP:
39172 icode = CODE_FOR_rdranddi_1;
39176 op0 = gen_reg_rtx (mode0);
39177 emit_insn (GEN_FCN (icode) (op0));
39179 arg0 = CALL_EXPR_ARG (exp, 0);
39180 op1 = expand_normal (arg0);
39181 if (!address_operand (op1, VOIDmode))
39183 op1 = convert_memory_address (Pmode, op1);
39184 op1 = copy_addr_to_reg (op1);
39186 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39188 op1 = gen_reg_rtx (SImode);
39189 emit_move_insn (op1, CONST1_RTX (SImode));
39191 /* Emit SImode conditional move. */
39192 if (mode0 == HImode)
39194 op2 = gen_reg_rtx (SImode);
39195 emit_insn (gen_zero_extendhisi2 (op2, op0));
39197 else if (mode0 == SImode)
39200 op2 = gen_rtx_SUBREG (SImode, op0, 0);
39203 || !register_operand (target, SImode))
39204 target = gen_reg_rtx (SImode);
39206 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39208 emit_insn (gen_rtx_SET (VOIDmode, target,
39209 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39212 case IX86_BUILTIN_RDSEED16_STEP:
39213 icode = CODE_FOR_rdseedhi_1;
39217 case IX86_BUILTIN_RDSEED32_STEP:
39218 icode = CODE_FOR_rdseedsi_1;
39222 case IX86_BUILTIN_RDSEED64_STEP:
39223 icode = CODE_FOR_rdseeddi_1;
39227 op0 = gen_reg_rtx (mode0);
39228 emit_insn (GEN_FCN (icode) (op0));
39230 arg0 = CALL_EXPR_ARG (exp, 0);
39231 op1 = expand_normal (arg0);
39232 if (!address_operand (op1, VOIDmode))
39234 op1 = convert_memory_address (Pmode, op1);
39235 op1 = copy_addr_to_reg (op1);
39237 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39239 op2 = gen_reg_rtx (QImode);
39241 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39243 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
39246 || !register_operand (target, SImode))
39247 target = gen_reg_rtx (SImode);
39249 emit_insn (gen_zero_extendqisi2 (target, op2));
39252 case IX86_BUILTIN_SBB32:
39253 icode = CODE_FOR_subsi3_carry;
39257 case IX86_BUILTIN_SBB64:
39258 icode = CODE_FOR_subdi3_carry;
39262 case IX86_BUILTIN_ADDCARRYX32:
39263 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
39267 case IX86_BUILTIN_ADDCARRYX64:
39268 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
39272 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39273 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39274 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39275 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39277 op0 = gen_reg_rtx (QImode);
39279 /* Generate CF from input operand. */
39280 op1 = expand_normal (arg0);
39281 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39282 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
39284 /* Gen ADCX instruction to compute X+Y+CF. */
39285 op2 = expand_normal (arg1);
39286 op3 = expand_normal (arg2);
39289 op2 = copy_to_mode_reg (mode0, op2);
39291 op3 = copy_to_mode_reg (mode0, op3);
39293 op0 = gen_reg_rtx (mode0);
39295 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
39296 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
39297 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
39299 /* Store the result. */
39300 op4 = expand_normal (arg3);
39301 if (!address_operand (op4, VOIDmode))
39303 op4 = convert_memory_address (Pmode, op4);
39304 op4 = copy_addr_to_reg (op4);
39306 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39308 /* Return current CF value. */
39310 target = gen_reg_rtx (QImode);
39312 PUT_MODE (pat, QImode);
39313 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
39316 case IX86_BUILTIN_READ_FLAGS:
39317 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39320 || target == NULL_RTX
39321 || !nonimmediate_operand (target, word_mode)
39322 || GET_MODE (target) != word_mode)
39323 target = gen_reg_rtx (word_mode);
39325 emit_insn (gen_pop (target));
39328 case IX86_BUILTIN_WRITE_FLAGS:
39330 arg0 = CALL_EXPR_ARG (exp, 0);
39331 op0 = expand_normal (arg0);
39332 if (!general_no_elim_operand (op0, word_mode))
39333 op0 = copy_to_mode_reg (word_mode, op0);
39335 emit_insn (gen_push (op0));
39336 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39339 case IX86_BUILTIN_KORTESTC16:
39340 icode = CODE_FOR_kortestchi;
39345 case IX86_BUILTIN_KORTESTZ16:
39346 icode = CODE_FOR_kortestzhi;
39351 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39352 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39353 op0 = expand_normal (arg0);
39354 op1 = expand_normal (arg1);
39356 op0 = copy_to_reg (op0);
39357 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39358 op1 = copy_to_reg (op1);
39359 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39361 target = gen_reg_rtx (QImode);
39362 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
39364 /* Emit kortest. */
39365 emit_insn (GEN_FCN (icode) (op0, op1));
39366 /* And use setcc to return result from flags. */
39367 ix86_expand_setcc (target, EQ,
39368 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39371 case IX86_BUILTIN_GATHERSIV2DF:
39372 icode = CODE_FOR_avx2_gathersiv2df;
39374 case IX86_BUILTIN_GATHERSIV4DF:
39375 icode = CODE_FOR_avx2_gathersiv4df;
39377 case IX86_BUILTIN_GATHERDIV2DF:
39378 icode = CODE_FOR_avx2_gatherdiv2df;
39380 case IX86_BUILTIN_GATHERDIV4DF:
39381 icode = CODE_FOR_avx2_gatherdiv4df;
39383 case IX86_BUILTIN_GATHERSIV4SF:
39384 icode = CODE_FOR_avx2_gathersiv4sf;
39386 case IX86_BUILTIN_GATHERSIV8SF:
39387 icode = CODE_FOR_avx2_gathersiv8sf;
39389 case IX86_BUILTIN_GATHERDIV4SF:
39390 icode = CODE_FOR_avx2_gatherdiv4sf;
39392 case IX86_BUILTIN_GATHERDIV8SF:
39393 icode = CODE_FOR_avx2_gatherdiv8sf;
39395 case IX86_BUILTIN_GATHERSIV2DI:
39396 icode = CODE_FOR_avx2_gathersiv2di;
39398 case IX86_BUILTIN_GATHERSIV4DI:
39399 icode = CODE_FOR_avx2_gathersiv4di;
39401 case IX86_BUILTIN_GATHERDIV2DI:
39402 icode = CODE_FOR_avx2_gatherdiv2di;
39404 case IX86_BUILTIN_GATHERDIV4DI:
39405 icode = CODE_FOR_avx2_gatherdiv4di;
39407 case IX86_BUILTIN_GATHERSIV4SI:
39408 icode = CODE_FOR_avx2_gathersiv4si;
39410 case IX86_BUILTIN_GATHERSIV8SI:
39411 icode = CODE_FOR_avx2_gathersiv8si;
39413 case IX86_BUILTIN_GATHERDIV4SI:
39414 icode = CODE_FOR_avx2_gatherdiv4si;
39416 case IX86_BUILTIN_GATHERDIV8SI:
39417 icode = CODE_FOR_avx2_gatherdiv8si;
39419 case IX86_BUILTIN_GATHERALTSIV4DF:
39420 icode = CODE_FOR_avx2_gathersiv4df;
39422 case IX86_BUILTIN_GATHERALTDIV8SF:
39423 icode = CODE_FOR_avx2_gatherdiv8sf;
39425 case IX86_BUILTIN_GATHERALTSIV4DI:
39426 icode = CODE_FOR_avx2_gathersiv4di;
39428 case IX86_BUILTIN_GATHERALTDIV8SI:
39429 icode = CODE_FOR_avx2_gatherdiv8si;
39431 case IX86_BUILTIN_GATHER3SIV16SF:
39432 icode = CODE_FOR_avx512f_gathersiv16sf;
39434 case IX86_BUILTIN_GATHER3SIV8DF:
39435 icode = CODE_FOR_avx512f_gathersiv8df;
39437 case IX86_BUILTIN_GATHER3DIV16SF:
39438 icode = CODE_FOR_avx512f_gatherdiv16sf;
39440 case IX86_BUILTIN_GATHER3DIV8DF:
39441 icode = CODE_FOR_avx512f_gatherdiv8df;
39443 case IX86_BUILTIN_GATHER3SIV16SI:
39444 icode = CODE_FOR_avx512f_gathersiv16si;
39446 case IX86_BUILTIN_GATHER3SIV8DI:
39447 icode = CODE_FOR_avx512f_gathersiv8di;
39449 case IX86_BUILTIN_GATHER3DIV16SI:
39450 icode = CODE_FOR_avx512f_gatherdiv16si;
39452 case IX86_BUILTIN_GATHER3DIV8DI:
39453 icode = CODE_FOR_avx512f_gatherdiv8di;
39455 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39456 icode = CODE_FOR_avx512f_gathersiv8df;
39458 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39459 icode = CODE_FOR_avx512f_gatherdiv16sf;
39461 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39462 icode = CODE_FOR_avx512f_gathersiv8di;
39464 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39465 icode = CODE_FOR_avx512f_gatherdiv16si;
39467 case IX86_BUILTIN_GATHER3SIV2DF:
39468 icode = CODE_FOR_avx512vl_gathersiv2df;
39470 case IX86_BUILTIN_GATHER3SIV4DF:
39471 icode = CODE_FOR_avx512vl_gathersiv4df;
39473 case IX86_BUILTIN_GATHER3DIV2DF:
39474 icode = CODE_FOR_avx512vl_gatherdiv2df;
39476 case IX86_BUILTIN_GATHER3DIV4DF:
39477 icode = CODE_FOR_avx512vl_gatherdiv4df;
39479 case IX86_BUILTIN_GATHER3SIV4SF:
39480 icode = CODE_FOR_avx512vl_gathersiv4sf;
39482 case IX86_BUILTIN_GATHER3SIV8SF:
39483 icode = CODE_FOR_avx512vl_gathersiv8sf;
39485 case IX86_BUILTIN_GATHER3DIV4SF:
39486 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39488 case IX86_BUILTIN_GATHER3DIV8SF:
39489 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39491 case IX86_BUILTIN_GATHER3SIV2DI:
39492 icode = CODE_FOR_avx512vl_gathersiv2di;
39494 case IX86_BUILTIN_GATHER3SIV4DI:
39495 icode = CODE_FOR_avx512vl_gathersiv4di;
39497 case IX86_BUILTIN_GATHER3DIV2DI:
39498 icode = CODE_FOR_avx512vl_gatherdiv2di;
39500 case IX86_BUILTIN_GATHER3DIV4DI:
39501 icode = CODE_FOR_avx512vl_gatherdiv4di;
39503 case IX86_BUILTIN_GATHER3SIV4SI:
39504 icode = CODE_FOR_avx512vl_gathersiv4si;
39506 case IX86_BUILTIN_GATHER3SIV8SI:
39507 icode = CODE_FOR_avx512vl_gathersiv8si;
39509 case IX86_BUILTIN_GATHER3DIV4SI:
39510 icode = CODE_FOR_avx512vl_gatherdiv4si;
39512 case IX86_BUILTIN_GATHER3DIV8SI:
39513 icode = CODE_FOR_avx512vl_gatherdiv8si;
39515 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39516 icode = CODE_FOR_avx512vl_gathersiv4df;
39518 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39519 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39521 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39522 icode = CODE_FOR_avx512vl_gathersiv4di;
39524 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39525 icode = CODE_FOR_avx512vl_gatherdiv8si;
39527 case IX86_BUILTIN_SCATTERSIV16SF:
39528 icode = CODE_FOR_avx512f_scattersiv16sf;
39530 case IX86_BUILTIN_SCATTERSIV8DF:
39531 icode = CODE_FOR_avx512f_scattersiv8df;
39533 case IX86_BUILTIN_SCATTERDIV16SF:
39534 icode = CODE_FOR_avx512f_scatterdiv16sf;
39536 case IX86_BUILTIN_SCATTERDIV8DF:
39537 icode = CODE_FOR_avx512f_scatterdiv8df;
39539 case IX86_BUILTIN_SCATTERSIV16SI:
39540 icode = CODE_FOR_avx512f_scattersiv16si;
39542 case IX86_BUILTIN_SCATTERSIV8DI:
39543 icode = CODE_FOR_avx512f_scattersiv8di;
39545 case IX86_BUILTIN_SCATTERDIV16SI:
39546 icode = CODE_FOR_avx512f_scatterdiv16si;
39548 case IX86_BUILTIN_SCATTERDIV8DI:
39549 icode = CODE_FOR_avx512f_scatterdiv8di;
39551 case IX86_BUILTIN_SCATTERSIV8SF:
39552 icode = CODE_FOR_avx512vl_scattersiv8sf;
39554 case IX86_BUILTIN_SCATTERSIV4SF:
39555 icode = CODE_FOR_avx512vl_scattersiv4sf;
39557 case IX86_BUILTIN_SCATTERSIV4DF:
39558 icode = CODE_FOR_avx512vl_scattersiv4df;
39560 case IX86_BUILTIN_SCATTERSIV2DF:
39561 icode = CODE_FOR_avx512vl_scattersiv2df;
39563 case IX86_BUILTIN_SCATTERDIV8SF:
39564 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39566 case IX86_BUILTIN_SCATTERDIV4SF:
39567 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39569 case IX86_BUILTIN_SCATTERDIV4DF:
39570 icode = CODE_FOR_avx512vl_scatterdiv4df;
39572 case IX86_BUILTIN_SCATTERDIV2DF:
39573 icode = CODE_FOR_avx512vl_scatterdiv2df;
39575 case IX86_BUILTIN_SCATTERSIV8SI:
39576 icode = CODE_FOR_avx512vl_scattersiv8si;
39578 case IX86_BUILTIN_SCATTERSIV4SI:
39579 icode = CODE_FOR_avx512vl_scattersiv4si;
39581 case IX86_BUILTIN_SCATTERSIV4DI:
39582 icode = CODE_FOR_avx512vl_scattersiv4di;
39584 case IX86_BUILTIN_SCATTERSIV2DI:
39585 icode = CODE_FOR_avx512vl_scattersiv2di;
39587 case IX86_BUILTIN_SCATTERDIV8SI:
39588 icode = CODE_FOR_avx512vl_scatterdiv8si;
39590 case IX86_BUILTIN_SCATTERDIV4SI:
39591 icode = CODE_FOR_avx512vl_scatterdiv4si;
39593 case IX86_BUILTIN_SCATTERDIV4DI:
39594 icode = CODE_FOR_avx512vl_scatterdiv4di;
39596 case IX86_BUILTIN_SCATTERDIV2DI:
39597 icode = CODE_FOR_avx512vl_scatterdiv2di;
39599 case IX86_BUILTIN_GATHERPFDPD:
39600 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39601 goto vec_prefetch_gen;
39602 case IX86_BUILTIN_GATHERPFDPS:
39603 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39604 goto vec_prefetch_gen;
39605 case IX86_BUILTIN_GATHERPFQPD:
39606 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39607 goto vec_prefetch_gen;
39608 case IX86_BUILTIN_GATHERPFQPS:
39609 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39610 goto vec_prefetch_gen;
39611 case IX86_BUILTIN_SCATTERPFDPD:
39612 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39613 goto vec_prefetch_gen;
39614 case IX86_BUILTIN_SCATTERPFDPS:
39615 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39616 goto vec_prefetch_gen;
39617 case IX86_BUILTIN_SCATTERPFQPD:
39618 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39619 goto vec_prefetch_gen;
39620 case IX86_BUILTIN_SCATTERPFQPS:
39621 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39622 goto vec_prefetch_gen;
39626 rtx (*gen) (rtx, rtx);
39628 arg0 = CALL_EXPR_ARG (exp, 0);
39629 arg1 = CALL_EXPR_ARG (exp, 1);
39630 arg2 = CALL_EXPR_ARG (exp, 2);
39631 arg3 = CALL_EXPR_ARG (exp, 3);
39632 arg4 = CALL_EXPR_ARG (exp, 4);
39633 op0 = expand_normal (arg0);
39634 op1 = expand_normal (arg1);
39635 op2 = expand_normal (arg2);
39636 op3 = expand_normal (arg3);
39637 op4 = expand_normal (arg4);
39638 /* Note the arg order is different from the operand order. */
39639 mode0 = insn_data[icode].operand[1].mode;
39640 mode2 = insn_data[icode].operand[3].mode;
39641 mode3 = insn_data[icode].operand[4].mode;
39642 mode4 = insn_data[icode].operand[5].mode;
39644 if (target == NULL_RTX
39645 || GET_MODE (target) != insn_data[icode].operand[0].mode
39646 || !insn_data[icode].operand[0].predicate (target,
39647 GET_MODE (target)))
39648 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
39650 subtarget = target;
39654 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39655 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39656 half = gen_reg_rtx (V8SImode);
39657 if (!nonimmediate_operand (op2, V16SImode))
39658 op2 = copy_to_mode_reg (V16SImode, op2);
39659 emit_insn (gen_vec_extract_lo_v16si (half, op2));
39662 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39663 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39664 case IX86_BUILTIN_GATHERALTSIV4DF:
39665 case IX86_BUILTIN_GATHERALTSIV4DI:
39666 half = gen_reg_rtx (V4SImode);
39667 if (!nonimmediate_operand (op2, V8SImode))
39668 op2 = copy_to_mode_reg (V8SImode, op2);
39669 emit_insn (gen_vec_extract_lo_v8si (half, op2));
39672 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39673 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39674 half = gen_reg_rtx (mode0);
39675 if (mode0 == V8SFmode)
39676 gen = gen_vec_extract_lo_v16sf;
39678 gen = gen_vec_extract_lo_v16si;
39679 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39680 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39681 emit_insn (gen (half, op0));
39683 if (GET_MODE (op3) != VOIDmode)
39685 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39686 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39687 emit_insn (gen (half, op3));
39691 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39692 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39693 case IX86_BUILTIN_GATHERALTDIV8SF:
39694 case IX86_BUILTIN_GATHERALTDIV8SI:
39695 half = gen_reg_rtx (mode0);
39696 if (mode0 == V4SFmode)
39697 gen = gen_vec_extract_lo_v8sf;
39699 gen = gen_vec_extract_lo_v8si;
39700 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39701 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39702 emit_insn (gen (half, op0));
39704 if (GET_MODE (op3) != VOIDmode)
39706 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39707 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39708 emit_insn (gen (half, op3));
39716 /* Force memory operand only with base register here. But we
39717 don't want to do it on memory operand for other builtin
39719 op1 = ix86_zero_extend_to_Pmode (op1);
39721 if (!insn_data[icode].operand[1].predicate (op0, mode0))
39722 op0 = copy_to_mode_reg (mode0, op0);
39723 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
39724 op1 = copy_to_mode_reg (Pmode, op1);
39725 if (!insn_data[icode].operand[3].predicate (op2, mode2))
39726 op2 = copy_to_mode_reg (mode2, op2);
39727 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
39729 if (!insn_data[icode].operand[4].predicate (op3, mode3))
39730 op3 = copy_to_mode_reg (mode3, op3);
39734 op3 = copy_to_reg (op3);
39735 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
39737 if (!insn_data[icode].operand[5].predicate (op4, mode4))
39739 error ("the last argument must be scale 1, 2, 4, 8");
39743 /* Optimize. If mask is known to have all high bits set,
39744 replace op0 with pc_rtx to signal that the instruction
39745 overwrites the whole destination and doesn't use its
39746 previous contents. */
39749 if (TREE_CODE (arg3) == INTEGER_CST)
39751 if (integer_all_onesp (arg3))
39754 else if (TREE_CODE (arg3) == VECTOR_CST)
39756 unsigned int negative = 0;
39757 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
39759 tree cst = VECTOR_CST_ELT (arg3, i);
39760 if (TREE_CODE (cst) == INTEGER_CST
39761 && tree_int_cst_sign_bit (cst))
39763 else if (TREE_CODE (cst) == REAL_CST
39764 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
39767 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
39770 else if (TREE_CODE (arg3) == SSA_NAME
39771 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
39773 /* Recognize also when mask is like:
39774 __v2df src = _mm_setzero_pd ();
39775 __v2df mask = _mm_cmpeq_pd (src, src);
39777 __v8sf src = _mm256_setzero_ps ();
39778 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
39779 as that is a cheaper way to load all ones into
39780 a register than having to load a constant from
39782 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
39783 if (is_gimple_call (def_stmt))
39785 tree fndecl = gimple_call_fndecl (def_stmt);
39787 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
39788 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
39790 case IX86_BUILTIN_CMPPD:
39791 case IX86_BUILTIN_CMPPS:
39792 case IX86_BUILTIN_CMPPD256:
39793 case IX86_BUILTIN_CMPPS256:
39794 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
39797 case IX86_BUILTIN_CMPEQPD:
39798 case IX86_BUILTIN_CMPEQPS:
39799 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
39800 && initializer_zerop (gimple_call_arg (def_stmt,
39811 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
39818 case IX86_BUILTIN_GATHER3DIV16SF:
39819 if (target == NULL_RTX)
39820 target = gen_reg_rtx (V8SFmode);
39821 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
39823 case IX86_BUILTIN_GATHER3DIV16SI:
39824 if (target == NULL_RTX)
39825 target = gen_reg_rtx (V8SImode);
39826 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
39828 case IX86_BUILTIN_GATHER3DIV8SF:
39829 case IX86_BUILTIN_GATHERDIV8SF:
39830 if (target == NULL_RTX)
39831 target = gen_reg_rtx (V4SFmode);
39832 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
39834 case IX86_BUILTIN_GATHER3DIV8SI:
39835 case IX86_BUILTIN_GATHERDIV8SI:
39836 if (target == NULL_RTX)
39837 target = gen_reg_rtx (V4SImode);
39838 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
39841 target = subtarget;
39847 arg0 = CALL_EXPR_ARG (exp, 0);
39848 arg1 = CALL_EXPR_ARG (exp, 1);
39849 arg2 = CALL_EXPR_ARG (exp, 2);
39850 arg3 = CALL_EXPR_ARG (exp, 3);
39851 arg4 = CALL_EXPR_ARG (exp, 4);
39852 op0 = expand_normal (arg0);
39853 op1 = expand_normal (arg1);
39854 op2 = expand_normal (arg2);
39855 op3 = expand_normal (arg3);
39856 op4 = expand_normal (arg4);
39857 mode1 = insn_data[icode].operand[1].mode;
39858 mode2 = insn_data[icode].operand[2].mode;
39859 mode3 = insn_data[icode].operand[3].mode;
39860 mode4 = insn_data[icode].operand[4].mode;
39862 /* Force memory operand only with base register here. But we
39863 don't want to do it on memory operand for other builtin
39865 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
39867 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39868 op0 = copy_to_mode_reg (Pmode, op0);
39870 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
39872 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39873 op1 = copy_to_mode_reg (mode1, op1);
39877 op1 = copy_to_reg (op1);
39878 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
39881 if (!insn_data[icode].operand[2].predicate (op2, mode2))
39882 op2 = copy_to_mode_reg (mode2, op2);
39884 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39885 op3 = copy_to_mode_reg (mode3, op3);
39887 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39889 error ("the last argument must be scale 1, 2, 4, 8");
39893 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39901 arg0 = CALL_EXPR_ARG (exp, 0);
39902 arg1 = CALL_EXPR_ARG (exp, 1);
39903 arg2 = CALL_EXPR_ARG (exp, 2);
39904 arg3 = CALL_EXPR_ARG (exp, 3);
39905 arg4 = CALL_EXPR_ARG (exp, 4);
39906 op0 = expand_normal (arg0);
39907 op1 = expand_normal (arg1);
39908 op2 = expand_normal (arg2);
39909 op3 = expand_normal (arg3);
39910 op4 = expand_normal (arg4);
39911 mode0 = insn_data[icode].operand[0].mode;
39912 mode1 = insn_data[icode].operand[1].mode;
39913 mode3 = insn_data[icode].operand[3].mode;
39914 mode4 = insn_data[icode].operand[4].mode;
39916 if (GET_MODE (op0) == mode0
39917 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
39919 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39920 op0 = copy_to_mode_reg (mode0, op0);
39922 else if (op0 != constm1_rtx)
39924 op0 = copy_to_reg (op0);
39925 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39928 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39929 op1 = copy_to_mode_reg (mode1, op1);
39931 /* Force memory operand only with base register here. But we
39932 don't want to do it on memory operand for other builtin
39934 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
39936 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
39937 op2 = copy_to_mode_reg (Pmode, op2);
39939 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39941 error ("the forth argument must be scale 1, 2, 4, 8");
39945 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39947 error ("incorrect hint operand");
39951 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39959 case IX86_BUILTIN_XABORT:
39960 icode = CODE_FOR_xabort;
39961 arg0 = CALL_EXPR_ARG (exp, 0);
39962 op0 = expand_normal (arg0);
39963 mode0 = insn_data[icode].operand[0].mode;
39964 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39966 error ("the xabort's argument must be an 8-bit immediate");
39969 emit_insn (gen_xabort (op0));
39976 for (i = 0, d = bdesc_special_args;
39977 i < ARRAY_SIZE (bdesc_special_args);
39979 if (d->code == fcode)
39980 return ix86_expand_special_args_builtin (d, exp, target);
39982 for (i = 0, d = bdesc_args;
39983 i < ARRAY_SIZE (bdesc_args);
39985 if (d->code == fcode)
39988 case IX86_BUILTIN_FABSQ:
39989 case IX86_BUILTIN_COPYSIGNQ:
39991 /* Emit a normal call if SSE isn't available. */
39992 return expand_call (exp, target, ignore);
39994 return ix86_expand_args_builtin (d, exp, target);
39997 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
39998 if (d->code == fcode)
39999 return ix86_expand_sse_comi (d, exp, target);
40001 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
40002 if (d->code == fcode)
40003 return ix86_expand_round_builtin (d, exp, target);
40005 for (i = 0, d = bdesc_pcmpestr;
40006 i < ARRAY_SIZE (bdesc_pcmpestr);
40008 if (d->code == fcode)
40009 return ix86_expand_sse_pcmpestr (d, exp, target);
40011 for (i = 0, d = bdesc_pcmpistr;
40012 i < ARRAY_SIZE (bdesc_pcmpistr);
40014 if (d->code == fcode)
40015 return ix86_expand_sse_pcmpistr (d, exp, target);
40017 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
40018 if (d->code == fcode)
40019 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
40020 (enum ix86_builtin_func_type)
40021 d->flag, d->comparison);
40023 gcc_unreachable ();
40026 /* This returns the target-specific builtin with code CODE if
40027 current_function_decl has visibility on this builtin, which is checked
40028 using isa flags. Returns NULL_TREE otherwise. */
40030 static tree ix86_get_builtin (enum ix86_builtins code)
40032 struct cl_target_option *opts;
40033 tree target_tree = NULL_TREE;
40035 /* Determine the isa flags of current_function_decl. */
40037 if (current_function_decl)
40038 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
40040 if (target_tree == NULL)
40041 target_tree = target_option_default_node;
40043 opts = TREE_TARGET_OPTION (target_tree);
40045 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
40046 return ix86_builtin_decl (code, true);
40051 /* Return function decl for target specific builtin
40052 for given MPX builtin passed i FCODE. */
40054 ix86_builtin_mpx_function (unsigned fcode)
40058 case BUILT_IN_CHKP_BNDMK:
40059 return ix86_builtins[IX86_BUILTIN_BNDMK];
40061 case BUILT_IN_CHKP_BNDSTX:
40062 return ix86_builtins[IX86_BUILTIN_BNDSTX];
40064 case BUILT_IN_CHKP_BNDLDX:
40065 return ix86_builtins[IX86_BUILTIN_BNDLDX];
40067 case BUILT_IN_CHKP_BNDCL:
40068 return ix86_builtins[IX86_BUILTIN_BNDCL];
40070 case BUILT_IN_CHKP_BNDCU:
40071 return ix86_builtins[IX86_BUILTIN_BNDCU];
40073 case BUILT_IN_CHKP_BNDRET:
40074 return ix86_builtins[IX86_BUILTIN_BNDRET];
40076 case BUILT_IN_CHKP_INTERSECT:
40077 return ix86_builtins[IX86_BUILTIN_BNDINT];
40079 case BUILT_IN_CHKP_NARROW:
40080 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40082 case BUILT_IN_CHKP_SIZEOF:
40083 return ix86_builtins[IX86_BUILTIN_SIZEOF];
40085 case BUILT_IN_CHKP_EXTRACT_LOWER:
40086 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40088 case BUILT_IN_CHKP_EXTRACT_UPPER:
40089 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40095 gcc_unreachable ();
40098 /* Helper function for ix86_load_bounds and ix86_store_bounds.
40100 Return an address to be used to load/store bounds for pointer
40103 SLOT_NO is an integer constant holding number of a target
40104 dependent special slot to be used in case SLOT is not a memory.
40106 SPECIAL_BASE is a pointer to be used as a base of fake address
40107 to access special slots in Bounds Table. SPECIAL_BASE[-1],
40108 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
40111 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40115 /* NULL slot means we pass bounds for pointer not passed to the
40116 function at all. Register slot means we pass pointer in a
40117 register. In both these cases bounds are passed via Bounds
40118 Table. Since we do not have actual pointer stored in memory,
40119 we have to use fake addresses to access Bounds Table. We
40120 start with (special_base - sizeof (void*)) and decrease this
40121 address by pointer size to get addresses for other slots. */
40122 if (!slot || REG_P (slot))
40124 gcc_assert (CONST_INT_P (slot_no));
40125 addr = plus_constant (Pmode, special_base,
40126 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40128 /* If pointer is passed in a memory then its address is used to
40129 access Bounds Table. */
40130 else if (MEM_P (slot))
40132 addr = XEXP (slot, 0);
40133 if (!register_operand (addr, Pmode))
40134 addr = copy_addr_to_reg (addr);
40137 gcc_unreachable ();
40142 /* Expand pass uses this hook to load bounds for function parameter
40143 PTR passed in SLOT in case its bounds are not passed in a register.
40145 If SLOT is a memory, then bounds are loaded as for regular pointer
40146 loaded from memory. PTR may be NULL in case SLOT is a memory.
40147 In such case value of PTR (if required) may be loaded from SLOT.
40149 If SLOT is NULL or a register then SLOT_NO is an integer constant
40150 holding number of the target dependent special slot which should be
40151 used to obtain bounds.
40153 Return loaded bounds. */
40156 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40158 rtx reg = gen_reg_rtx (BNDmode);
40161 /* Get address to be used to access Bounds Table. Special slots start
40162 at the location of return address of the current function. */
40163 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40165 /* Load pointer value from a memory if we don't have it. */
40168 gcc_assert (MEM_P (slot));
40169 ptr = copy_addr_to_reg (slot);
40172 emit_insn (BNDmode == BND64mode
40173 ? gen_bnd64_ldx (reg, addr, ptr)
40174 : gen_bnd32_ldx (reg, addr, ptr));
40179 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40180 passed in SLOT in case BOUNDS are not passed in a register.
40182 If SLOT is a memory, then BOUNDS are stored as for regular pointer
40183 stored in memory. PTR may be NULL in case SLOT is a memory.
40184 In such case value of PTR (if required) may be loaded from SLOT.
40186 If SLOT is NULL or a register then SLOT_NO is an integer constant
40187 holding number of the target dependent special slot which should be
40188 used to store BOUNDS. */
40191 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40195 /* Get address to be used to access Bounds Table. Special slots start
40196 at the location of return address of a called function. */
40197 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40199 /* Load pointer value from a memory if we don't have it. */
40202 gcc_assert (MEM_P (slot));
40203 ptr = copy_addr_to_reg (slot);
40206 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40207 if (!register_operand (bounds, BNDmode))
40208 bounds = copy_to_mode_reg (BNDmode, bounds);
40210 emit_insn (BNDmode == BND64mode
40211 ? gen_bnd64_stx (addr, ptr, bounds)
40212 : gen_bnd32_stx (addr, ptr, bounds));
40215 /* Load and return bounds returned by function in SLOT. */
40218 ix86_load_returned_bounds (rtx slot)
40222 gcc_assert (REG_P (slot));
40223 res = gen_reg_rtx (BNDmode);
40224 emit_move_insn (res, slot);
40229 /* Store BOUNDS returned by function into SLOT. */
40232 ix86_store_returned_bounds (rtx slot, rtx bounds)
40234 gcc_assert (REG_P (slot));
40235 emit_move_insn (slot, bounds);
40238 /* Returns a function decl for a vectorized version of the builtin function
40239 with builtin function code FN and the result vector type TYPE, or NULL_TREE
40240 if it is not available. */
40243 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40246 machine_mode in_mode, out_mode;
40248 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40250 if (TREE_CODE (type_out) != VECTOR_TYPE
40251 || TREE_CODE (type_in) != VECTOR_TYPE
40252 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40255 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40256 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40257 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40258 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40262 case BUILT_IN_SQRT:
40263 if (out_mode == DFmode && in_mode == DFmode)
40265 if (out_n == 2 && in_n == 2)
40266 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40267 else if (out_n == 4 && in_n == 4)
40268 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40269 else if (out_n == 8 && in_n == 8)
40270 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40274 case BUILT_IN_EXP2F:
40275 if (out_mode == SFmode && in_mode == SFmode)
40277 if (out_n == 16 && in_n == 16)
40278 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40282 case BUILT_IN_SQRTF:
40283 if (out_mode == SFmode && in_mode == SFmode)
40285 if (out_n == 4 && in_n == 4)
40286 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40287 else if (out_n == 8 && in_n == 8)
40288 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40289 else if (out_n == 16 && in_n == 16)
40290 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40294 case BUILT_IN_IFLOOR:
40295 case BUILT_IN_LFLOOR:
40296 case BUILT_IN_LLFLOOR:
40297 /* The round insn does not trap on denormals. */
40298 if (flag_trapping_math || !TARGET_ROUND)
40301 if (out_mode == SImode && in_mode == DFmode)
40303 if (out_n == 4 && in_n == 2)
40304 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40305 else if (out_n == 8 && in_n == 4)
40306 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40307 else if (out_n == 16 && in_n == 8)
40308 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40312 case BUILT_IN_IFLOORF:
40313 case BUILT_IN_LFLOORF:
40314 case BUILT_IN_LLFLOORF:
40315 /* The round insn does not trap on denormals. */
40316 if (flag_trapping_math || !TARGET_ROUND)
40319 if (out_mode == SImode && in_mode == SFmode)
40321 if (out_n == 4 && in_n == 4)
40322 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40323 else if (out_n == 8 && in_n == 8)
40324 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40328 case BUILT_IN_ICEIL:
40329 case BUILT_IN_LCEIL:
40330 case BUILT_IN_LLCEIL:
40331 /* The round insn does not trap on denormals. */
40332 if (flag_trapping_math || !TARGET_ROUND)
40335 if (out_mode == SImode && in_mode == DFmode)
40337 if (out_n == 4 && in_n == 2)
40338 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40339 else if (out_n == 8 && in_n == 4)
40340 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40341 else if (out_n == 16 && in_n == 8)
40342 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40346 case BUILT_IN_ICEILF:
40347 case BUILT_IN_LCEILF:
40348 case BUILT_IN_LLCEILF:
40349 /* The round insn does not trap on denormals. */
40350 if (flag_trapping_math || !TARGET_ROUND)
40353 if (out_mode == SImode && in_mode == SFmode)
40355 if (out_n == 4 && in_n == 4)
40356 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40357 else if (out_n == 8 && in_n == 8)
40358 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40362 case BUILT_IN_IRINT:
40363 case BUILT_IN_LRINT:
40364 case BUILT_IN_LLRINT:
40365 if (out_mode == SImode && in_mode == DFmode)
40367 if (out_n == 4 && in_n == 2)
40368 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40369 else if (out_n == 8 && in_n == 4)
40370 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40374 case BUILT_IN_IRINTF:
40375 case BUILT_IN_LRINTF:
40376 case BUILT_IN_LLRINTF:
40377 if (out_mode == SImode && in_mode == SFmode)
40379 if (out_n == 4 && in_n == 4)
40380 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40381 else if (out_n == 8 && in_n == 8)
40382 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40386 case BUILT_IN_IROUND:
40387 case BUILT_IN_LROUND:
40388 case BUILT_IN_LLROUND:
40389 /* The round insn does not trap on denormals. */
40390 if (flag_trapping_math || !TARGET_ROUND)
40393 if (out_mode == SImode && in_mode == DFmode)
40395 if (out_n == 4 && in_n == 2)
40396 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40397 else if (out_n == 8 && in_n == 4)
40398 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40399 else if (out_n == 16 && in_n == 8)
40400 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40404 case BUILT_IN_IROUNDF:
40405 case BUILT_IN_LROUNDF:
40406 case BUILT_IN_LLROUNDF:
40407 /* The round insn does not trap on denormals. */
40408 if (flag_trapping_math || !TARGET_ROUND)
40411 if (out_mode == SImode && in_mode == SFmode)
40413 if (out_n == 4 && in_n == 4)
40414 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40415 else if (out_n == 8 && in_n == 8)
40416 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40420 case BUILT_IN_COPYSIGN:
40421 if (out_mode == DFmode && in_mode == DFmode)
40423 if (out_n == 2 && in_n == 2)
40424 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40425 else if (out_n == 4 && in_n == 4)
40426 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40427 else if (out_n == 8 && in_n == 8)
40428 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40432 case BUILT_IN_COPYSIGNF:
40433 if (out_mode == SFmode && in_mode == SFmode)
40435 if (out_n == 4 && in_n == 4)
40436 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40437 else if (out_n == 8 && in_n == 8)
40438 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40439 else if (out_n == 16 && in_n == 16)
40440 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40444 case BUILT_IN_FLOOR:
40445 /* The round insn does not trap on denormals. */
40446 if (flag_trapping_math || !TARGET_ROUND)
40449 if (out_mode == DFmode && in_mode == DFmode)
40451 if (out_n == 2 && in_n == 2)
40452 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40453 else if (out_n == 4 && in_n == 4)
40454 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40458 case BUILT_IN_FLOORF:
40459 /* The round insn does not trap on denormals. */
40460 if (flag_trapping_math || !TARGET_ROUND)
40463 if (out_mode == SFmode && in_mode == SFmode)
40465 if (out_n == 4 && in_n == 4)
40466 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40467 else if (out_n == 8 && in_n == 8)
40468 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40472 case BUILT_IN_CEIL:
40473 /* The round insn does not trap on denormals. */
40474 if (flag_trapping_math || !TARGET_ROUND)
40477 if (out_mode == DFmode && in_mode == DFmode)
40479 if (out_n == 2 && in_n == 2)
40480 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40481 else if (out_n == 4 && in_n == 4)
40482 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40486 case BUILT_IN_CEILF:
40487 /* The round insn does not trap on denormals. */
40488 if (flag_trapping_math || !TARGET_ROUND)
40491 if (out_mode == SFmode && in_mode == SFmode)
40493 if (out_n == 4 && in_n == 4)
40494 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40495 else if (out_n == 8 && in_n == 8)
40496 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40500 case BUILT_IN_TRUNC:
40501 /* The round insn does not trap on denormals. */
40502 if (flag_trapping_math || !TARGET_ROUND)
40505 if (out_mode == DFmode && in_mode == DFmode)
40507 if (out_n == 2 && in_n == 2)
40508 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40509 else if (out_n == 4 && in_n == 4)
40510 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40514 case BUILT_IN_TRUNCF:
40515 /* The round insn does not trap on denormals. */
40516 if (flag_trapping_math || !TARGET_ROUND)
40519 if (out_mode == SFmode && in_mode == SFmode)
40521 if (out_n == 4 && in_n == 4)
40522 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40523 else if (out_n == 8 && in_n == 8)
40524 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40528 case BUILT_IN_RINT:
40529 /* The round insn does not trap on denormals. */
40530 if (flag_trapping_math || !TARGET_ROUND)
40533 if (out_mode == DFmode && in_mode == DFmode)
40535 if (out_n == 2 && in_n == 2)
40536 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40537 else if (out_n == 4 && in_n == 4)
40538 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40542 case BUILT_IN_RINTF:
40543 /* The round insn does not trap on denormals. */
40544 if (flag_trapping_math || !TARGET_ROUND)
40547 if (out_mode == SFmode && in_mode == SFmode)
40549 if (out_n == 4 && in_n == 4)
40550 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40551 else if (out_n == 8 && in_n == 8)
40552 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40556 case BUILT_IN_ROUND:
40557 /* The round insn does not trap on denormals. */
40558 if (flag_trapping_math || !TARGET_ROUND)
40561 if (out_mode == DFmode && in_mode == DFmode)
40563 if (out_n == 2 && in_n == 2)
40564 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40565 else if (out_n == 4 && in_n == 4)
40566 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40570 case BUILT_IN_ROUNDF:
40571 /* The round insn does not trap on denormals. */
40572 if (flag_trapping_math || !TARGET_ROUND)
40575 if (out_mode == SFmode && in_mode == SFmode)
40577 if (out_n == 4 && in_n == 4)
40578 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40579 else if (out_n == 8 && in_n == 8)
40580 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40585 if (out_mode == DFmode && in_mode == DFmode)
40587 if (out_n == 2 && in_n == 2)
40588 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40589 if (out_n == 4 && in_n == 4)
40590 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40594 case BUILT_IN_FMAF:
40595 if (out_mode == SFmode && in_mode == SFmode)
40597 if (out_n == 4 && in_n == 4)
40598 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40599 if (out_n == 8 && in_n == 8)
40600 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40608 /* Dispatch to a handler for a vectorization library. */
40609 if (ix86_veclib_handler)
40610 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40616 /* Handler for an SVML-style interface to
40617 a library with vectorized intrinsics. */
40620 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40623 tree fntype, new_fndecl, args;
40626 machine_mode el_mode, in_mode;
40629 /* The SVML is suitable for unsafe math only. */
40630 if (!flag_unsafe_math_optimizations)
40633 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40634 n = TYPE_VECTOR_SUBPARTS (type_out);
40635 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40636 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40637 if (el_mode != in_mode
40645 case BUILT_IN_LOG10:
40647 case BUILT_IN_TANH:
40649 case BUILT_IN_ATAN:
40650 case BUILT_IN_ATAN2:
40651 case BUILT_IN_ATANH:
40652 case BUILT_IN_CBRT:
40653 case BUILT_IN_SINH:
40655 case BUILT_IN_ASINH:
40656 case BUILT_IN_ASIN:
40657 case BUILT_IN_COSH:
40659 case BUILT_IN_ACOSH:
40660 case BUILT_IN_ACOS:
40661 if (el_mode != DFmode || n != 2)
40665 case BUILT_IN_EXPF:
40666 case BUILT_IN_LOGF:
40667 case BUILT_IN_LOG10F:
40668 case BUILT_IN_POWF:
40669 case BUILT_IN_TANHF:
40670 case BUILT_IN_TANF:
40671 case BUILT_IN_ATANF:
40672 case BUILT_IN_ATAN2F:
40673 case BUILT_IN_ATANHF:
40674 case BUILT_IN_CBRTF:
40675 case BUILT_IN_SINHF:
40676 case BUILT_IN_SINF:
40677 case BUILT_IN_ASINHF:
40678 case BUILT_IN_ASINF:
40679 case BUILT_IN_COSHF:
40680 case BUILT_IN_COSF:
40681 case BUILT_IN_ACOSHF:
40682 case BUILT_IN_ACOSF:
40683 if (el_mode != SFmode || n != 4)
40691 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40693 if (fn == BUILT_IN_LOGF)
40694 strcpy (name, "vmlsLn4");
40695 else if (fn == BUILT_IN_LOG)
40696 strcpy (name, "vmldLn2");
40699 sprintf (name, "vmls%s", bname+10);
40700 name[strlen (name)-1] = '4';
40703 sprintf (name, "vmld%s2", bname+10);
40705 /* Convert to uppercase. */
40709 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40711 args = TREE_CHAIN (args))
40715 fntype = build_function_type_list (type_out, type_in, NULL);
40717 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40719 /* Build a function declaration for the vectorized function. */
40720 new_fndecl = build_decl (BUILTINS_LOCATION,
40721 FUNCTION_DECL, get_identifier (name), fntype);
40722 TREE_PUBLIC (new_fndecl) = 1;
40723 DECL_EXTERNAL (new_fndecl) = 1;
40724 DECL_IS_NOVOPS (new_fndecl) = 1;
40725 TREE_READONLY (new_fndecl) = 1;
40730 /* Handler for an ACML-style interface to
40731 a library with vectorized intrinsics. */
40734 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
40736 char name[20] = "__vr.._";
40737 tree fntype, new_fndecl, args;
40740 machine_mode el_mode, in_mode;
40743 /* The ACML is 64bits only and suitable for unsafe math only as
40744 it does not correctly support parts of IEEE with the required
40745 precision such as denormals. */
40747 || !flag_unsafe_math_optimizations)
40750 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40751 n = TYPE_VECTOR_SUBPARTS (type_out);
40752 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40753 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40754 if (el_mode != in_mode
40764 case BUILT_IN_LOG2:
40765 case BUILT_IN_LOG10:
40768 if (el_mode != DFmode
40773 case BUILT_IN_SINF:
40774 case BUILT_IN_COSF:
40775 case BUILT_IN_EXPF:
40776 case BUILT_IN_POWF:
40777 case BUILT_IN_LOGF:
40778 case BUILT_IN_LOG2F:
40779 case BUILT_IN_LOG10F:
40782 if (el_mode != SFmode
40791 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40792 sprintf (name + 7, "%s", bname+10);
40795 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40797 args = TREE_CHAIN (args))
40801 fntype = build_function_type_list (type_out, type_in, NULL);
40803 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40805 /* Build a function declaration for the vectorized function. */
40806 new_fndecl = build_decl (BUILTINS_LOCATION,
40807 FUNCTION_DECL, get_identifier (name), fntype);
40808 TREE_PUBLIC (new_fndecl) = 1;
40809 DECL_EXTERNAL (new_fndecl) = 1;
40810 DECL_IS_NOVOPS (new_fndecl) = 1;
40811 TREE_READONLY (new_fndecl) = 1;
40816 /* Returns a decl of a function that implements gather load with
40817 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
40818 Return NULL_TREE if it is not available. */
40821 ix86_vectorize_builtin_gather (const_tree mem_vectype,
40822 const_tree index_type, int scale)
40825 enum ix86_builtins code;
40830 if ((TREE_CODE (index_type) != INTEGER_TYPE
40831 && !POINTER_TYPE_P (index_type))
40832 || (TYPE_MODE (index_type) != SImode
40833 && TYPE_MODE (index_type) != DImode))
40836 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
40839 /* v*gather* insn sign extends index to pointer mode. */
40840 if (TYPE_PRECISION (index_type) < POINTER_SIZE
40841 && TYPE_UNSIGNED (index_type))
40846 || (scale & (scale - 1)) != 0)
40849 si = TYPE_MODE (index_type) == SImode;
40850 switch (TYPE_MODE (mem_vectype))
40853 if (TARGET_AVX512VL)
40854 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
40856 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
40859 if (TARGET_AVX512VL)
40860 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
40862 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
40865 if (TARGET_AVX512VL)
40866 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
40868 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
40871 if (TARGET_AVX512VL)
40872 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
40874 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
40877 if (TARGET_AVX512VL)
40878 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
40880 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
40883 if (TARGET_AVX512VL)
40884 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
40886 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
40889 if (TARGET_AVX512VL)
40890 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
40892 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
40895 if (TARGET_AVX512VL)
40896 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
40898 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
40901 if (TARGET_AVX512F)
40902 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
40907 if (TARGET_AVX512F)
40908 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
40913 if (TARGET_AVX512F)
40914 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
40919 if (TARGET_AVX512F)
40920 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
40928 return ix86_get_builtin (code);
40931 /* Returns a code for a target-specific builtin that implements
40932 reciprocal of the function, or NULL_TREE if not available. */
40935 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
40937 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
40938 && flag_finite_math_only && !flag_trapping_math
40939 && flag_unsafe_math_optimizations))
40943 /* Machine dependent builtins. */
40946 /* Vectorized version of sqrt to rsqrt conversion. */
40947 case IX86_BUILTIN_SQRTPS_NR:
40948 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
40950 case IX86_BUILTIN_SQRTPS_NR256:
40951 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
40957 /* Normal builtins. */
40960 /* Sqrt to rsqrt conversion. */
40961 case BUILT_IN_SQRTF:
40962 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
40969 /* Helper for avx_vpermilps256_operand et al. This is also used by
40970 the expansion functions to turn the parallel back into a mask.
40971 The return value is 0 for no match and the imm8+1 for a match. */
40974 avx_vpermilp_parallel (rtx par, machine_mode mode)
40976 unsigned i, nelt = GET_MODE_NUNITS (mode);
40978 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
40980 if (XVECLEN (par, 0) != (int) nelt)
40983 /* Validate that all of the elements are constants, and not totally
40984 out of range. Copy the data into an integral array to make the
40985 subsequent checks easier. */
40986 for (i = 0; i < nelt; ++i)
40988 rtx er = XVECEXP (par, 0, i);
40989 unsigned HOST_WIDE_INT ei;
40991 if (!CONST_INT_P (er))
41002 /* In the 512-bit DFmode case, we can only move elements within
41003 a 128-bit lane. First fill the second part of the mask,
41005 for (i = 4; i < 6; ++i)
41007 if (ipar[i] < 4 || ipar[i] >= 6)
41009 mask |= (ipar[i] - 4) << i;
41011 for (i = 6; i < 8; ++i)
41015 mask |= (ipar[i] - 6) << i;
41020 /* In the 256-bit DFmode case, we can only move elements within
41022 for (i = 0; i < 2; ++i)
41026 mask |= ipar[i] << i;
41028 for (i = 2; i < 4; ++i)
41032 mask |= (ipar[i] - 2) << i;
41037 /* In 512 bit SFmode case, permutation in the upper 256 bits
41038 must mirror the permutation in the lower 256-bits. */
41039 for (i = 0; i < 8; ++i)
41040 if (ipar[i] + 8 != ipar[i + 8])
41045 /* In 256 bit SFmode case, we have full freedom of
41046 movement within the low 128-bit lane, but the high 128-bit
41047 lane must mirror the exact same pattern. */
41048 for (i = 0; i < 4; ++i)
41049 if (ipar[i] + 4 != ipar[i + 4])
41056 /* In the 128-bit case, we've full freedom in the placement of
41057 the elements from the source operand. */
41058 for (i = 0; i < nelt; ++i)
41059 mask |= ipar[i] << (i * (nelt / 2));
41063 gcc_unreachable ();
41066 /* Make sure success has a non-zero value by adding one. */
41070 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
41071 the expansion functions to turn the parallel back into a mask.
41072 The return value is 0 for no match and the imm8+1 for a match. */
41075 avx_vperm2f128_parallel (rtx par, machine_mode mode)
41077 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41079 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
41081 if (XVECLEN (par, 0) != (int) nelt)
41084 /* Validate that all of the elements are constants, and not totally
41085 out of range. Copy the data into an integral array to make the
41086 subsequent checks easier. */
41087 for (i = 0; i < nelt; ++i)
41089 rtx er = XVECEXP (par, 0, i);
41090 unsigned HOST_WIDE_INT ei;
41092 if (!CONST_INT_P (er))
41095 if (ei >= 2 * nelt)
41100 /* Validate that the halves of the permute are halves. */
41101 for (i = 0; i < nelt2 - 1; ++i)
41102 if (ipar[i] + 1 != ipar[i + 1])
41104 for (i = nelt2; i < nelt - 1; ++i)
41105 if (ipar[i] + 1 != ipar[i + 1])
41108 /* Reconstruct the mask. */
41109 for (i = 0; i < 2; ++i)
41111 unsigned e = ipar[i * nelt2];
41115 mask |= e << (i * 4);
41118 /* Make sure success has a non-zero value by adding one. */
41122 /* Return a register priority for hard reg REGNO. */
41124 ix86_register_priority (int hard_regno)
41126 /* ebp and r13 as the base always wants a displacement, r12 as the
41127 base always wants an index. So discourage their usage in an
41129 if (hard_regno == R12_REG || hard_regno == R13_REG)
41131 if (hard_regno == BP_REG)
41133 /* New x86-64 int registers result in bigger code size. Discourage
41135 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41137 /* New x86-64 SSE registers result in bigger code size. Discourage
41139 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41141 /* Usage of AX register results in smaller code. Prefer it. */
41142 if (hard_regno == 0)
41147 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
41149 Put float CONST_DOUBLE in the constant pool instead of fp regs.
41150 QImode must go into class Q_REGS.
41151 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
41152 movdf to do mem-to-mem moves through integer regs. */
41155 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41157 machine_mode mode = GET_MODE (x);
41159 /* We're only allowed to return a subclass of CLASS. Many of the
41160 following checks fail for NO_REGS, so eliminate that early. */
41161 if (regclass == NO_REGS)
41164 /* All classes can load zeros. */
41165 if (x == CONST0_RTX (mode))
41168 /* Force constants into memory if we are loading a (nonzero) constant into
41169 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
41170 instructions to load from a constant. */
41172 && (MAYBE_MMX_CLASS_P (regclass)
41173 || MAYBE_SSE_CLASS_P (regclass)
41174 || MAYBE_MASK_CLASS_P (regclass)))
41177 /* Prefer SSE regs only, if we can use them for math. */
41178 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41179 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41181 /* Floating-point constants need more complex checks. */
41182 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
41184 /* General regs can load everything. */
41185 if (reg_class_subset_p (regclass, GENERAL_REGS))
41188 /* Floats can load 0 and 1 plus some others. Note that we eliminated
41189 zero above. We only want to wind up preferring 80387 registers if
41190 we plan on doing computation with them. */
41192 && standard_80387_constant_p (x) > 0)
41194 /* Limit class to non-sse. */
41195 if (regclass == FLOAT_SSE_REGS)
41197 if (regclass == FP_TOP_SSE_REGS)
41199 if (regclass == FP_SECOND_SSE_REGS)
41200 return FP_SECOND_REG;
41201 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41208 /* Generally when we see PLUS here, it's the function invariant
41209 (plus soft-fp const_int). Which can only be computed into general
41211 if (GET_CODE (x) == PLUS)
41212 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41214 /* QImode constants are easy to load, but non-constant QImode data
41215 must go into Q_REGS. */
41216 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41218 if (reg_class_subset_p (regclass, Q_REGS))
41220 if (reg_class_subset_p (Q_REGS, regclass))
41228 /* Discourage putting floating-point values in SSE registers unless
41229 SSE math is being used, and likewise for the 387 registers. */
41231 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41233 machine_mode mode = GET_MODE (x);
41235 /* Restrict the output reload class to the register bank that we are doing
41236 math on. If we would like not to return a subset of CLASS, reject this
41237 alternative: if reload cannot do this, it will still use its choice. */
41238 mode = GET_MODE (x);
41239 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41240 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41242 if (X87_FLOAT_MODE_P (mode))
41244 if (regclass == FP_TOP_SSE_REGS)
41246 else if (regclass == FP_SECOND_SSE_REGS)
41247 return FP_SECOND_REG;
41249 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41256 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41257 machine_mode mode, secondary_reload_info *sri)
41259 /* Double-word spills from general registers to non-offsettable memory
41260 references (zero-extended addresses) require special handling. */
41263 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41264 && INTEGER_CLASS_P (rclass)
41265 && !offsettable_memref_p (x))
41268 ? CODE_FOR_reload_noff_load
41269 : CODE_FOR_reload_noff_store);
41270 /* Add the cost of moving address to a temporary. */
41271 sri->extra_cost = 1;
41276 /* QImode spills from non-QI registers require
41277 intermediate register on 32bit targets. */
41279 && (MAYBE_MASK_CLASS_P (rclass)
41280 || (!TARGET_64BIT && !in_p
41281 && INTEGER_CLASS_P (rclass)
41282 && MAYBE_NON_Q_CLASS_P (rclass))))
41291 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41292 regno = true_regnum (x);
41294 /* Return Q_REGS if the operand is in memory. */
41299 /* This condition handles corner case where an expression involving
41300 pointers gets vectorized. We're trying to use the address of a
41301 stack slot as a vector initializer.
41303 (set (reg:V2DI 74 [ vect_cst_.2 ])
41304 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41306 Eventually frame gets turned into sp+offset like this:
41308 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41309 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41310 (const_int 392 [0x188]))))
41312 That later gets turned into:
41314 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41315 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41316 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41318 We'll have the following reload recorded:
41320 Reload 0: reload_in (DI) =
41321 (plus:DI (reg/f:DI 7 sp)
41322 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41323 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41324 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41325 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41326 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41327 reload_reg_rtx: (reg:V2DI 22 xmm1)
41329 Which isn't going to work since SSE instructions can't handle scalar
41330 additions. Returning GENERAL_REGS forces the addition into integer
41331 register and reload can handle subsequent reloads without problems. */
41333 if (in_p && GET_CODE (x) == PLUS
41334 && SSE_CLASS_P (rclass)
41335 && SCALAR_INT_MODE_P (mode))
41336 return GENERAL_REGS;
41341 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41344 ix86_class_likely_spilled_p (reg_class_t rclass)
41355 case SSE_FIRST_REG:
41357 case FP_SECOND_REG:
41368 /* If we are copying between general and FP registers, we need a memory
41369 location. The same is true for SSE and MMX registers.
41371 To optimize register_move_cost performance, allow inline variant.
41373 The macro can't work reliably when one of the CLASSES is class containing
41374 registers from multiple units (SSE, MMX, integer). We avoid this by never
41375 combining those units in single alternative in the machine description.
41376 Ensure that this constraint holds to avoid unexpected surprises.
41378 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41379 enforce these sanity checks. */
41382 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41383 machine_mode mode, int strict)
41385 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41387 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41388 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41389 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41390 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41391 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41392 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41394 gcc_assert (!strict || lra_in_progress);
41398 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41401 /* Between mask and general, we have moves no larger than word size. */
41402 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41403 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41406 /* ??? This is a lie. We do have moves between mmx/general, and for
41407 mmx/sse2. But by saying we need secondary memory we discourage the
41408 register allocator from using the mmx registers unless needed. */
41409 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41412 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41414 /* SSE1 doesn't have any direct moves from other classes. */
41418 /* If the target says that inter-unit moves are more expensive
41419 than moving through memory, then don't generate them. */
41420 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41421 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41424 /* Between SSE and general, we have moves no larger than word size. */
41425 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41433 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41434 machine_mode mode, int strict)
41436 return inline_secondary_memory_needed (class1, class2, mode, strict);
41439 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41441 On the 80386, this is the size of MODE in words,
41442 except in the FP regs, where a single reg is always enough. */
41444 static unsigned char
41445 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41447 if (MAYBE_INTEGER_CLASS_P (rclass))
41449 if (mode == XFmode)
41450 return (TARGET_64BIT ? 2 : 3);
41451 else if (mode == XCmode)
41452 return (TARGET_64BIT ? 4 : 6);
41454 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41458 if (COMPLEX_MODE_P (mode))
41465 /* Return true if the registers in CLASS cannot represent the change from
41466 modes FROM to TO. */
41469 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41470 enum reg_class regclass)
41475 /* x87 registers can't do subreg at all, as all values are reformatted
41476 to extended precision. */
41477 if (MAYBE_FLOAT_CLASS_P (regclass))
41480 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41482 /* Vector registers do not support QI or HImode loads. If we don't
41483 disallow a change to these modes, reload will assume it's ok to
41484 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41485 the vec_dupv4hi pattern. */
41486 if (GET_MODE_SIZE (from) < 4)
41493 /* Return the cost of moving data of mode M between a
41494 register and memory. A value of 2 is the default; this cost is
41495 relative to those in `REGISTER_MOVE_COST'.
41497 This function is used extensively by register_move_cost that is used to
41498 build tables at startup. Make it inline in this case.
41499 When IN is 2, return maximum of in and out move cost.
41501 If moving between registers and memory is more expensive than
41502 between two registers, you should define this macro to express the
41505 Model also increased moving costs of QImode registers in non
41509 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41513 if (FLOAT_CLASS_P (regclass))
41531 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41532 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41534 if (SSE_CLASS_P (regclass))
41537 switch (GET_MODE_SIZE (mode))
41552 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41553 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41555 if (MMX_CLASS_P (regclass))
41558 switch (GET_MODE_SIZE (mode))
41570 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41571 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41573 switch (GET_MODE_SIZE (mode))
41576 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41579 return ix86_cost->int_store[0];
41580 if (TARGET_PARTIAL_REG_DEPENDENCY
41581 && optimize_function_for_speed_p (cfun))
41582 cost = ix86_cost->movzbl_load;
41584 cost = ix86_cost->int_load[0];
41586 return MAX (cost, ix86_cost->int_store[0]);
41592 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41594 return ix86_cost->movzbl_load;
41596 return ix86_cost->int_store[0] + 4;
41601 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41602 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41604 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41605 if (mode == TFmode)
41608 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41610 cost = ix86_cost->int_load[2];
41612 cost = ix86_cost->int_store[2];
41613 return (cost * (((int) GET_MODE_SIZE (mode)
41614 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
41619 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
41622 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
41626 /* Return the cost of moving data from a register in class CLASS1 to
41627 one in class CLASS2.
41629 It is not required that the cost always equal 2 when FROM is the same as TO;
41630 on some machines it is expensive to move between registers if they are not
41631 general registers. */
41634 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
41635 reg_class_t class2_i)
41637 enum reg_class class1 = (enum reg_class) class1_i;
41638 enum reg_class class2 = (enum reg_class) class2_i;
41640 /* In case we require secondary memory, compute cost of the store followed
41641 by load. In order to avoid bad register allocation choices, we need
41642 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
41644 if (inline_secondary_memory_needed (class1, class2, mode, 0))
41648 cost += inline_memory_move_cost (mode, class1, 2);
41649 cost += inline_memory_move_cost (mode, class2, 2);
41651 /* In case of copying from general_purpose_register we may emit multiple
41652 stores followed by single load causing memory size mismatch stall.
41653 Count this as arbitrarily high cost of 20. */
41654 if (targetm.class_max_nregs (class1, mode)
41655 > targetm.class_max_nregs (class2, mode))
41658 /* In the case of FP/MMX moves, the registers actually overlap, and we
41659 have to switch modes in order to treat them differently. */
41660 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
41661 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
41667 /* Moves between SSE/MMX and integer unit are expensive. */
41668 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
41669 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41671 /* ??? By keeping returned value relatively high, we limit the number
41672 of moves between integer and MMX/SSE registers for all targets.
41673 Additionally, high value prevents problem with x86_modes_tieable_p(),
41674 where integer modes in MMX/SSE registers are not tieable
41675 because of missing QImode and HImode moves to, from or between
41676 MMX/SSE registers. */
41677 return MAX (8, ix86_cost->mmxsse_to_integer);
41679 if (MAYBE_FLOAT_CLASS_P (class1))
41680 return ix86_cost->fp_move;
41681 if (MAYBE_SSE_CLASS_P (class1))
41682 return ix86_cost->sse_move;
41683 if (MAYBE_MMX_CLASS_P (class1))
41684 return ix86_cost->mmx_move;
41688 /* Return TRUE if hard register REGNO can hold a value of machine-mode
41692 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
41694 /* Flags and only flags can only hold CCmode values. */
41695 if (CC_REGNO_P (regno))
41696 return GET_MODE_CLASS (mode) == MODE_CC;
41697 if (GET_MODE_CLASS (mode) == MODE_CC
41698 || GET_MODE_CLASS (mode) == MODE_RANDOM
41699 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
41701 if (STACK_REGNO_P (regno))
41702 return VALID_FP_MODE_P (mode);
41703 if (MASK_REGNO_P (regno))
41704 return (VALID_MASK_REG_MODE (mode)
41705 || (TARGET_AVX512BW
41706 && VALID_MASK_AVX512BW_MODE (mode)));
41707 if (BND_REGNO_P (regno))
41708 return VALID_BND_REG_MODE (mode);
41709 if (SSE_REGNO_P (regno))
41711 /* We implement the move patterns for all vector modes into and
41712 out of SSE registers, even when no operation instructions
41715 /* For AVX-512 we allow, regardless of regno:
41717 - any of 512-bit wide vector mode
41718 - any scalar mode. */
41721 || VALID_AVX512F_REG_MODE (mode)
41722 || VALID_AVX512F_SCALAR_MODE (mode)))
41725 /* TODO check for QI/HI scalars. */
41726 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
41727 if (TARGET_AVX512VL
41730 || VALID_AVX256_REG_MODE (mode)
41731 || VALID_AVX512VL_128_REG_MODE (mode)))
41734 /* xmm16-xmm31 are only available for AVX-512. */
41735 if (EXT_REX_SSE_REGNO_P (regno))
41738 /* OImode and AVX modes are available only when AVX is enabled. */
41739 return ((TARGET_AVX
41740 && VALID_AVX256_REG_OR_OI_MODE (mode))
41741 || VALID_SSE_REG_MODE (mode)
41742 || VALID_SSE2_REG_MODE (mode)
41743 || VALID_MMX_REG_MODE (mode)
41744 || VALID_MMX_REG_MODE_3DNOW (mode));
41746 if (MMX_REGNO_P (regno))
41748 /* We implement the move patterns for 3DNOW modes even in MMX mode,
41749 so if the register is available at all, then we can move data of
41750 the given mode into or out of it. */
41751 return (VALID_MMX_REG_MODE (mode)
41752 || VALID_MMX_REG_MODE_3DNOW (mode));
41755 if (mode == QImode)
41757 /* Take care for QImode values - they can be in non-QI regs,
41758 but then they do cause partial register stalls. */
41759 if (ANY_QI_REGNO_P (regno))
41761 if (!TARGET_PARTIAL_REG_STALL)
41763 /* LRA checks if the hard register is OK for the given mode.
41764 QImode values can live in non-QI regs, so we allow all
41766 if (lra_in_progress)
41768 return !can_create_pseudo_p ();
41770 /* We handle both integer and floats in the general purpose registers. */
41771 else if (VALID_INT_MODE_P (mode))
41773 else if (VALID_FP_MODE_P (mode))
41775 else if (VALID_DFP_MODE_P (mode))
41777 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
41778 on to use that value in smaller contexts, this can easily force a
41779 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
41780 supporting DImode, allow it. */
41781 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
41787 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
41788 tieable integer mode. */
41791 ix86_tieable_integer_mode_p (machine_mode mode)
41800 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
41803 return TARGET_64BIT;
41810 /* Return true if MODE1 is accessible in a register that can hold MODE2
41811 without copying. That is, all register classes that can hold MODE2
41812 can also hold MODE1. */
41815 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
41817 if (mode1 == mode2)
41820 if (ix86_tieable_integer_mode_p (mode1)
41821 && ix86_tieable_integer_mode_p (mode2))
41824 /* MODE2 being XFmode implies fp stack or general regs, which means we
41825 can tie any smaller floating point modes to it. Note that we do not
41826 tie this with TFmode. */
41827 if (mode2 == XFmode)
41828 return mode1 == SFmode || mode1 == DFmode;
41830 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
41831 that we can tie it with SFmode. */
41832 if (mode2 == DFmode)
41833 return mode1 == SFmode;
41835 /* If MODE2 is only appropriate for an SSE register, then tie with
41836 any other mode acceptable to SSE registers. */
41837 if (GET_MODE_SIZE (mode2) == 32
41838 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41839 return (GET_MODE_SIZE (mode1) == 32
41840 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41841 if (GET_MODE_SIZE (mode2) == 16
41842 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41843 return (GET_MODE_SIZE (mode1) == 16
41844 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41846 /* If MODE2 is appropriate for an MMX register, then tie
41847 with any other mode acceptable to MMX registers. */
41848 if (GET_MODE_SIZE (mode2) == 8
41849 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
41850 return (GET_MODE_SIZE (mode1) == 8
41851 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
41856 /* Return the cost of moving between two registers of mode MODE. */
41859 ix86_set_reg_reg_cost (machine_mode mode)
41861 unsigned int units = UNITS_PER_WORD;
41863 switch (GET_MODE_CLASS (mode))
41869 units = GET_MODE_SIZE (CCmode);
41873 if ((TARGET_SSE && mode == TFmode)
41874 || (TARGET_80387 && mode == XFmode)
41875 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
41876 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
41877 units = GET_MODE_SIZE (mode);
41880 case MODE_COMPLEX_FLOAT:
41881 if ((TARGET_SSE && mode == TCmode)
41882 || (TARGET_80387 && mode == XCmode)
41883 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
41884 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
41885 units = GET_MODE_SIZE (mode);
41888 case MODE_VECTOR_INT:
41889 case MODE_VECTOR_FLOAT:
41890 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41891 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41892 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41893 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41894 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
41895 units = GET_MODE_SIZE (mode);
41898 /* Return the cost of moving between two registers of mode MODE,
41899 assuming that the move will be in pieces of at most UNITS bytes. */
41900 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
41903 /* Compute a (partial) cost for rtx X. Return true if the complete
41904 cost has been computed, and false if subexpressions should be
41905 scanned. In either case, *TOTAL contains the cost result. */
41908 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
41912 enum rtx_code code = (enum rtx_code) code_i;
41913 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
41914 machine_mode mode = GET_MODE (x);
41915 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
41920 if (register_operand (SET_DEST (x), VOIDmode)
41921 && reg_or_0_operand (SET_SRC (x), VOIDmode))
41923 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
41932 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
41934 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
41936 else if (flag_pic && SYMBOLIC_CONST (x)
41938 && (GET_CODE (x) == LABEL_REF
41939 || (GET_CODE (x) == SYMBOL_REF
41940 && SYMBOL_REF_LOCAL_P (x)))))
41947 if (mode == VOIDmode)
41952 switch (standard_80387_constant_p (x))
41957 default: /* Other constants */
41964 if (SSE_FLOAT_MODE_P (mode))
41967 switch (standard_sse_constant_p (x))
41971 case 1: /* 0: xor eliminates false dependency */
41974 default: /* -1: cmp contains false dependency */
41979 /* Fall back to (MEM (SYMBOL_REF)), since that's where
41980 it'll probably end up. Add a penalty for size. */
41981 *total = (COSTS_N_INSNS (1)
41982 + (flag_pic != 0 && !TARGET_64BIT)
41983 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
41987 /* The zero extensions is often completely free on x86_64, so make
41988 it as cheap as possible. */
41989 if (TARGET_64BIT && mode == DImode
41990 && GET_MODE (XEXP (x, 0)) == SImode)
41992 else if (TARGET_ZERO_EXTEND_WITH_AND)
41993 *total = cost->add;
41995 *total = cost->movzx;
41999 *total = cost->movsx;
42003 if (SCALAR_INT_MODE_P (mode)
42004 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
42005 && CONST_INT_P (XEXP (x, 1)))
42007 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42010 *total = cost->add;
42013 if ((value == 2 || value == 3)
42014 && cost->lea <= cost->shift_const)
42016 *total = cost->lea;
42026 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42028 /* ??? Should be SSE vector operation cost. */
42029 /* At least for published AMD latencies, this really is the same
42030 as the latency for a simple fpu operation like fabs. */
42031 /* V*QImode is emulated with 1-11 insns. */
42032 if (mode == V16QImode || mode == V32QImode)
42035 if (TARGET_XOP && mode == V16QImode)
42037 /* For XOP we use vpshab, which requires a broadcast of the
42038 value to the variable shift insn. For constants this
42039 means a V16Q const in mem; even when we can perform the
42040 shift with one insn set the cost to prefer paddb. */
42041 if (CONSTANT_P (XEXP (x, 1)))
42043 *total = (cost->fabs
42044 + rtx_cost (XEXP (x, 0), code, 0, speed)
42045 + (speed ? 2 : COSTS_N_BYTES (16)));
42050 else if (TARGET_SSSE3)
42052 *total = cost->fabs * count;
42055 *total = cost->fabs;
42057 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42059 if (CONST_INT_P (XEXP (x, 1)))
42061 if (INTVAL (XEXP (x, 1)) > 32)
42062 *total = cost->shift_const + COSTS_N_INSNS (2);
42064 *total = cost->shift_const * 2;
42068 if (GET_CODE (XEXP (x, 1)) == AND)
42069 *total = cost->shift_var * 2;
42071 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
42076 if (CONST_INT_P (XEXP (x, 1)))
42077 *total = cost->shift_const;
42078 else if (GET_CODE (XEXP (x, 1)) == SUBREG
42079 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42081 /* Return the cost after shift-and truncation. */
42082 *total = cost->shift_var;
42086 *total = cost->shift_var;
42094 gcc_assert (FLOAT_MODE_P (mode));
42095 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42097 /* ??? SSE scalar/vector cost should be used here. */
42098 /* ??? Bald assumption that fma has the same cost as fmul. */
42099 *total = cost->fmul;
42100 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
42102 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
42104 if (GET_CODE (sub) == NEG)
42105 sub = XEXP (sub, 0);
42106 *total += rtx_cost (sub, FMA, 0, speed);
42109 if (GET_CODE (sub) == NEG)
42110 sub = XEXP (sub, 0);
42111 *total += rtx_cost (sub, FMA, 2, speed);
42116 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42118 /* ??? SSE scalar cost should be used here. */
42119 *total = cost->fmul;
42122 else if (X87_FLOAT_MODE_P (mode))
42124 *total = cost->fmul;
42127 else if (FLOAT_MODE_P (mode))
42129 /* ??? SSE vector cost should be used here. */
42130 *total = cost->fmul;
42133 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42135 /* V*QImode is emulated with 7-13 insns. */
42136 if (mode == V16QImode || mode == V32QImode)
42139 if (TARGET_XOP && mode == V16QImode)
42141 else if (TARGET_SSSE3)
42143 *total = cost->fmul * 2 + cost->fabs * extra;
42145 /* V*DImode is emulated with 5-8 insns. */
42146 else if (mode == V2DImode || mode == V4DImode)
42148 if (TARGET_XOP && mode == V2DImode)
42149 *total = cost->fmul * 2 + cost->fabs * 3;
42151 *total = cost->fmul * 3 + cost->fabs * 5;
42153 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42154 insns, including two PMULUDQ. */
42155 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42156 *total = cost->fmul * 2 + cost->fabs * 5;
42158 *total = cost->fmul;
42163 rtx op0 = XEXP (x, 0);
42164 rtx op1 = XEXP (x, 1);
42166 if (CONST_INT_P (XEXP (x, 1)))
42168 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42169 for (nbits = 0; value != 0; value &= value - 1)
42173 /* This is arbitrary. */
42176 /* Compute costs correctly for widening multiplication. */
42177 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42178 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42179 == GET_MODE_SIZE (mode))
42181 int is_mulwiden = 0;
42182 machine_mode inner_mode = GET_MODE (op0);
42184 if (GET_CODE (op0) == GET_CODE (op1))
42185 is_mulwiden = 1, op1 = XEXP (op1, 0);
42186 else if (CONST_INT_P (op1))
42188 if (GET_CODE (op0) == SIGN_EXTEND)
42189 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42192 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42196 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42199 *total = (cost->mult_init[MODE_INDEX (mode)]
42200 + nbits * cost->mult_bit
42201 + rtx_cost (op0, outer_code, opno, speed)
42202 + rtx_cost (op1, outer_code, opno, speed));
42211 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42212 /* ??? SSE cost should be used here. */
42213 *total = cost->fdiv;
42214 else if (X87_FLOAT_MODE_P (mode))
42215 *total = cost->fdiv;
42216 else if (FLOAT_MODE_P (mode))
42217 /* ??? SSE vector cost should be used here. */
42218 *total = cost->fdiv;
42220 *total = cost->divide[MODE_INDEX (mode)];
42224 if (GET_MODE_CLASS (mode) == MODE_INT
42225 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42227 if (GET_CODE (XEXP (x, 0)) == PLUS
42228 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42229 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42230 && CONSTANT_P (XEXP (x, 1)))
42232 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42233 if (val == 2 || val == 4 || val == 8)
42235 *total = cost->lea;
42236 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42237 outer_code, opno, speed);
42238 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
42239 outer_code, opno, speed);
42240 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42244 else if (GET_CODE (XEXP (x, 0)) == MULT
42245 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42247 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42248 if (val == 2 || val == 4 || val == 8)
42250 *total = cost->lea;
42251 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42252 outer_code, opno, speed);
42253 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42257 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42259 *total = cost->lea;
42260 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42261 outer_code, opno, speed);
42262 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42263 outer_code, opno, speed);
42264 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42271 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42273 /* ??? SSE cost should be used here. */
42274 *total = cost->fadd;
42277 else if (X87_FLOAT_MODE_P (mode))
42279 *total = cost->fadd;
42282 else if (FLOAT_MODE_P (mode))
42284 /* ??? SSE vector cost should be used here. */
42285 *total = cost->fadd;
42293 if (GET_MODE_CLASS (mode) == MODE_INT
42294 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42296 *total = (cost->add * 2
42297 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
42298 << (GET_MODE (XEXP (x, 0)) != DImode))
42299 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
42300 << (GET_MODE (XEXP (x, 1)) != DImode)));
42306 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42308 /* ??? SSE cost should be used here. */
42309 *total = cost->fchs;
42312 else if (X87_FLOAT_MODE_P (mode))
42314 *total = cost->fchs;
42317 else if (FLOAT_MODE_P (mode))
42319 /* ??? SSE vector cost should be used here. */
42320 *total = cost->fchs;
42326 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42328 /* ??? Should be SSE vector operation cost. */
42329 /* At least for published AMD latencies, this really is the same
42330 as the latency for a simple fpu operation like fabs. */
42331 *total = cost->fabs;
42333 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42334 *total = cost->add * 2;
42336 *total = cost->add;
42340 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42341 && XEXP (XEXP (x, 0), 1) == const1_rtx
42342 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42343 && XEXP (x, 1) == const0_rtx)
42345 /* This kind of construct is implemented using test[bwl].
42346 Treat it as if we had an AND. */
42347 *total = (cost->add
42348 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
42349 + rtx_cost (const1_rtx, outer_code, opno, speed));
42355 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42360 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42361 /* ??? SSE cost should be used here. */
42362 *total = cost->fabs;
42363 else if (X87_FLOAT_MODE_P (mode))
42364 *total = cost->fabs;
42365 else if (FLOAT_MODE_P (mode))
42366 /* ??? SSE vector cost should be used here. */
42367 *total = cost->fabs;
42371 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42372 /* ??? SSE cost should be used here. */
42373 *total = cost->fsqrt;
42374 else if (X87_FLOAT_MODE_P (mode))
42375 *total = cost->fsqrt;
42376 else if (FLOAT_MODE_P (mode))
42377 /* ??? SSE vector cost should be used here. */
42378 *total = cost->fsqrt;
42382 if (XINT (x, 1) == UNSPEC_TP)
42388 case VEC_DUPLICATE:
42389 /* ??? Assume all of these vector manipulation patterns are
42390 recognizable. In which case they all pretty much have the
42392 *total = cost->fabs;
42395 mask = XEXP (x, 2);
42396 /* This is masked instruction, assume the same cost,
42397 as nonmasked variant. */
42398 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42399 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
42401 *total = cost->fabs;
42411 static int current_machopic_label_num;
42413 /* Given a symbol name and its associated stub, write out the
42414 definition of the stub. */
42417 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42419 unsigned int length;
42420 char *binder_name, *symbol_name, lazy_ptr_name[32];
42421 int label = ++current_machopic_label_num;
42423 /* For 64-bit we shouldn't get here. */
42424 gcc_assert (!TARGET_64BIT);
42426 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42427 symb = targetm.strip_name_encoding (symb);
42429 length = strlen (stub);
42430 binder_name = XALLOCAVEC (char, length + 32);
42431 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42433 length = strlen (symb);
42434 symbol_name = XALLOCAVEC (char, length + 32);
42435 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42437 sprintf (lazy_ptr_name, "L%d$lz", label);
42439 if (MACHOPIC_ATT_STUB)
42440 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42441 else if (MACHOPIC_PURE)
42442 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42444 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42446 fprintf (file, "%s:\n", stub);
42447 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42449 if (MACHOPIC_ATT_STUB)
42451 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42453 else if (MACHOPIC_PURE)
42456 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42457 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42458 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42459 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42460 label, lazy_ptr_name, label);
42461 fprintf (file, "\tjmp\t*%%ecx\n");
42464 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42466 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42467 it needs no stub-binding-helper. */
42468 if (MACHOPIC_ATT_STUB)
42471 fprintf (file, "%s:\n", binder_name);
42475 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42476 fprintf (file, "\tpushl\t%%ecx\n");
42479 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42481 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42483 /* N.B. Keep the correspondence of these
42484 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42485 old-pic/new-pic/non-pic stubs; altering this will break
42486 compatibility with existing dylibs. */
42489 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42490 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42493 /* 16-byte -mdynamic-no-pic stub. */
42494 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42496 fprintf (file, "%s:\n", lazy_ptr_name);
42497 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42498 fprintf (file, ASM_LONG "%s\n", binder_name);
42500 #endif /* TARGET_MACHO */
42502 /* Order the registers for register allocator. */
42505 x86_order_regs_for_local_alloc (void)
42510 /* First allocate the local general purpose registers. */
42511 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42512 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42513 reg_alloc_order [pos++] = i;
42515 /* Global general purpose registers. */
42516 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42517 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42518 reg_alloc_order [pos++] = i;
42520 /* x87 registers come first in case we are doing FP math
42522 if (!TARGET_SSE_MATH)
42523 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42524 reg_alloc_order [pos++] = i;
42526 /* SSE registers. */
42527 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42528 reg_alloc_order [pos++] = i;
42529 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42530 reg_alloc_order [pos++] = i;
42532 /* Extended REX SSE registers. */
42533 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42534 reg_alloc_order [pos++] = i;
42536 /* Mask register. */
42537 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42538 reg_alloc_order [pos++] = i;
42540 /* MPX bound registers. */
42541 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42542 reg_alloc_order [pos++] = i;
42544 /* x87 registers. */
42545 if (TARGET_SSE_MATH)
42546 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42547 reg_alloc_order [pos++] = i;
42549 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42550 reg_alloc_order [pos++] = i;
42552 /* Initialize the rest of array as we do not allocate some registers
42554 while (pos < FIRST_PSEUDO_REGISTER)
42555 reg_alloc_order [pos++] = 0;
42558 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42559 in struct attribute_spec handler. */
42561 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42564 bool *no_add_attrs)
42566 if (TREE_CODE (*node) != FUNCTION_TYPE
42567 && TREE_CODE (*node) != METHOD_TYPE
42568 && TREE_CODE (*node) != FIELD_DECL
42569 && TREE_CODE (*node) != TYPE_DECL)
42571 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42573 *no_add_attrs = true;
42578 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42580 *no_add_attrs = true;
42583 if (is_attribute_p ("callee_pop_aggregate_return", name))
42587 cst = TREE_VALUE (args);
42588 if (TREE_CODE (cst) != INTEGER_CST)
42590 warning (OPT_Wattributes,
42591 "%qE attribute requires an integer constant argument",
42593 *no_add_attrs = true;
42595 else if (compare_tree_int (cst, 0) != 0
42596 && compare_tree_int (cst, 1) != 0)
42598 warning (OPT_Wattributes,
42599 "argument to %qE attribute is neither zero, nor one",
42601 *no_add_attrs = true;
42610 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
42611 struct attribute_spec.handler. */
42613 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
42614 bool *no_add_attrs)
42616 if (TREE_CODE (*node) != FUNCTION_TYPE
42617 && TREE_CODE (*node) != METHOD_TYPE
42618 && TREE_CODE (*node) != FIELD_DECL
42619 && TREE_CODE (*node) != TYPE_DECL)
42621 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42623 *no_add_attrs = true;
42627 /* Can combine regparm with all attributes but fastcall. */
42628 if (is_attribute_p ("ms_abi", name))
42630 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
42632 error ("ms_abi and sysv_abi attributes are not compatible");
42637 else if (is_attribute_p ("sysv_abi", name))
42639 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
42641 error ("ms_abi and sysv_abi attributes are not compatible");
42650 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
42651 struct attribute_spec.handler. */
42653 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
42654 bool *no_add_attrs)
42657 if (DECL_P (*node))
42659 if (TREE_CODE (*node) == TYPE_DECL)
42660 type = &TREE_TYPE (*node);
42665 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
42667 warning (OPT_Wattributes, "%qE attribute ignored",
42669 *no_add_attrs = true;
42672 else if ((is_attribute_p ("ms_struct", name)
42673 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
42674 || ((is_attribute_p ("gcc_struct", name)
42675 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
42677 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
42679 *no_add_attrs = true;
42686 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
42687 bool *no_add_attrs)
42689 if (TREE_CODE (*node) != FUNCTION_DECL)
42691 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42693 *no_add_attrs = true;
42699 ix86_ms_bitfield_layout_p (const_tree record_type)
42701 return ((TARGET_MS_BITFIELD_LAYOUT
42702 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
42703 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
42706 /* Returns an expression indicating where the this parameter is
42707 located on entry to the FUNCTION. */
42710 x86_this_parameter (tree function)
42712 tree type = TREE_TYPE (function);
42713 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
42718 const int *parm_regs;
42720 if (ix86_function_type_abi (type) == MS_ABI)
42721 parm_regs = x86_64_ms_abi_int_parameter_registers;
42723 parm_regs = x86_64_int_parameter_registers;
42724 return gen_rtx_REG (Pmode, parm_regs[aggr]);
42727 nregs = ix86_function_regparm (type, function);
42729 if (nregs > 0 && !stdarg_p (type))
42732 unsigned int ccvt = ix86_get_callcvt (type);
42734 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42735 regno = aggr ? DX_REG : CX_REG;
42736 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42740 return gen_rtx_MEM (SImode,
42741 plus_constant (Pmode, stack_pointer_rtx, 4));
42750 return gen_rtx_MEM (SImode,
42751 plus_constant (Pmode,
42752 stack_pointer_rtx, 4));
42755 return gen_rtx_REG (SImode, regno);
42758 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
42762 /* Determine whether x86_output_mi_thunk can succeed. */
42765 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
42766 const_tree function)
42768 /* 64-bit can handle anything. */
42772 /* For 32-bit, everything's fine if we have one free register. */
42773 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
42776 /* Need a free register for vcall_offset. */
42780 /* Need a free register for GOT references. */
42781 if (flag_pic && !targetm.binds_local_p (function))
42784 /* Otherwise ok. */
42788 /* Output the assembler code for a thunk function. THUNK_DECL is the
42789 declaration for the thunk function itself, FUNCTION is the decl for
42790 the target function. DELTA is an immediate constant offset to be
42791 added to THIS. If VCALL_OFFSET is nonzero, the word at
42792 *(*this + vcall_offset) should be added to THIS. */
42795 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
42796 HOST_WIDE_INT vcall_offset, tree function)
42798 rtx this_param = x86_this_parameter (function);
42799 rtx this_reg, tmp, fnaddr;
42800 unsigned int tmp_regno;
42804 tmp_regno = R10_REG;
42807 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
42808 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42809 tmp_regno = AX_REG;
42810 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42811 tmp_regno = DX_REG;
42813 tmp_regno = CX_REG;
42816 emit_note (NOTE_INSN_PROLOGUE_END);
42818 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
42819 pull it in now and let DELTA benefit. */
42820 if (REG_P (this_param))
42821 this_reg = this_param;
42822 else if (vcall_offset)
42824 /* Put the this parameter into %eax. */
42825 this_reg = gen_rtx_REG (Pmode, AX_REG);
42826 emit_move_insn (this_reg, this_param);
42829 this_reg = NULL_RTX;
42831 /* Adjust the this parameter by a fixed constant. */
42834 rtx delta_rtx = GEN_INT (delta);
42835 rtx delta_dst = this_reg ? this_reg : this_param;
42839 if (!x86_64_general_operand (delta_rtx, Pmode))
42841 tmp = gen_rtx_REG (Pmode, tmp_regno);
42842 emit_move_insn (tmp, delta_rtx);
42847 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
42850 /* Adjust the this parameter by a value stored in the vtable. */
42853 rtx vcall_addr, vcall_mem, this_mem;
42855 tmp = gen_rtx_REG (Pmode, tmp_regno);
42857 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
42858 if (Pmode != ptr_mode)
42859 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
42860 emit_move_insn (tmp, this_mem);
42862 /* Adjust the this parameter. */
42863 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
42865 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
42867 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
42868 emit_move_insn (tmp2, GEN_INT (vcall_offset));
42869 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
42872 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
42873 if (Pmode != ptr_mode)
42874 emit_insn (gen_addsi_1_zext (this_reg,
42875 gen_rtx_REG (ptr_mode,
42879 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
42882 /* If necessary, drop THIS back to its stack slot. */
42883 if (this_reg && this_reg != this_param)
42884 emit_move_insn (this_param, this_reg);
42886 fnaddr = XEXP (DECL_RTL (function), 0);
42889 if (!flag_pic || targetm.binds_local_p (function)
42894 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
42895 tmp = gen_rtx_CONST (Pmode, tmp);
42896 fnaddr = gen_const_mem (Pmode, tmp);
42901 if (!flag_pic || targetm.binds_local_p (function))
42904 else if (TARGET_MACHO)
42906 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
42907 fnaddr = XEXP (fnaddr, 0);
42909 #endif /* TARGET_MACHO */
42912 tmp = gen_rtx_REG (Pmode, CX_REG);
42913 output_set_got (tmp, NULL_RTX);
42915 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
42916 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
42917 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
42918 fnaddr = gen_const_mem (Pmode, fnaddr);
42922 /* Our sibling call patterns do not allow memories, because we have no
42923 predicate that can distinguish between frame and non-frame memory.
42924 For our purposes here, we can get away with (ab)using a jump pattern,
42925 because we're going to do no optimization. */
42926 if (MEM_P (fnaddr))
42928 if (sibcall_insn_operand (fnaddr, word_mode))
42930 fnaddr = XEXP (DECL_RTL (function), 0);
42931 tmp = gen_rtx_MEM (QImode, fnaddr);
42932 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
42933 tmp = emit_call_insn (tmp);
42934 SIBLING_CALL_P (tmp) = 1;
42937 emit_jump_insn (gen_indirect_jump (fnaddr));
42941 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
42943 // CM_LARGE_PIC always uses pseudo PIC register which is
42944 // uninitialized. Since FUNCTION is local and calling it
42945 // doesn't go through PLT, we use scratch register %r11 as
42946 // PIC register and initialize it here.
42947 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
42948 ix86_init_large_pic_reg (tmp_regno);
42949 fnaddr = legitimize_pic_address (fnaddr,
42950 gen_rtx_REG (Pmode, tmp_regno));
42953 if (!sibcall_insn_operand (fnaddr, word_mode))
42955 tmp = gen_rtx_REG (word_mode, tmp_regno);
42956 if (GET_MODE (fnaddr) != word_mode)
42957 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
42958 emit_move_insn (tmp, fnaddr);
42962 tmp = gen_rtx_MEM (QImode, fnaddr);
42963 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
42964 tmp = emit_call_insn (tmp);
42965 SIBLING_CALL_P (tmp) = 1;
42969 /* Emit just enough of rest_of_compilation to get the insns emitted.
42970 Note that use_thunk calls assemble_start_function et al. */
42971 insn = get_insns ();
42972 shorten_branches (insn);
42973 final_start_function (insn, file, 1);
42974 final (insn, file, 1);
42975 final_end_function ();
42979 x86_file_start (void)
42981 default_file_start ();
42983 fputs ("\t.code16gcc\n", asm_out_file);
42985 darwin_file_start ();
42987 if (X86_FILE_START_VERSION_DIRECTIVE)
42988 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
42989 if (X86_FILE_START_FLTUSED)
42990 fputs ("\t.global\t__fltused\n", asm_out_file);
42991 if (ix86_asm_dialect == ASM_INTEL)
42992 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
42996 x86_field_alignment (tree field, int computed)
42999 tree type = TREE_TYPE (field);
43001 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
43003 mode = TYPE_MODE (strip_array_types (type));
43004 if (mode == DFmode || mode == DCmode
43005 || GET_MODE_CLASS (mode) == MODE_INT
43006 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
43007 return MIN (32, computed);
43011 /* Print call to TARGET to FILE. */
43014 x86_print_call_or_nop (FILE *file, const char *target)
43016 if (flag_nop_mcount)
43017 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
43019 fprintf (file, "1:\tcall\t%s\n", target);
43022 /* Output assembler code to FILE to increment profiler label # LABELNO
43023 for profiling a function entry. */
43025 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
43027 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
43031 #ifndef NO_PROFILE_COUNTERS
43032 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
43035 if (!TARGET_PECOFF && flag_pic)
43036 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
43038 x86_print_call_or_nop (file, mcount_name);
43042 #ifndef NO_PROFILE_COUNTERS
43043 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
43046 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
43050 #ifndef NO_PROFILE_COUNTERS
43051 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
43054 x86_print_call_or_nop (file, mcount_name);
43057 if (flag_record_mcount)
43059 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
43060 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
43061 fprintf (file, "\t.previous\n");
43065 /* We don't have exact information about the insn sizes, but we may assume
43066 quite safely that we are informed about all 1 byte insns and memory
43067 address sizes. This is enough to eliminate unnecessary padding in
43071 min_insn_size (rtx_insn *insn)
43075 if (!INSN_P (insn) || !active_insn_p (insn))
43078 /* Discard alignments we've emit and jump instructions. */
43079 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43080 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43083 /* Important case - calls are always 5 bytes.
43084 It is common to have many calls in the row. */
43086 && symbolic_reference_mentioned_p (PATTERN (insn))
43087 && !SIBLING_CALL_P (insn))
43089 len = get_attr_length (insn);
43093 /* For normal instructions we rely on get_attr_length being exact,
43094 with a few exceptions. */
43095 if (!JUMP_P (insn))
43097 enum attr_type type = get_attr_type (insn);
43102 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43103 || asm_noperands (PATTERN (insn)) >= 0)
43110 /* Otherwise trust get_attr_length. */
43114 l = get_attr_length_address (insn);
43115 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43124 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43126 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43130 ix86_avoid_jump_mispredicts (void)
43132 rtx_insn *insn, *start = get_insns ();
43133 int nbytes = 0, njumps = 0;
43134 bool isjump = false;
43136 /* Look for all minimal intervals of instructions containing 4 jumps.
43137 The intervals are bounded by START and INSN. NBYTES is the total
43138 size of instructions in the interval including INSN and not including
43139 START. When the NBYTES is smaller than 16 bytes, it is possible
43140 that the end of START and INSN ends up in the same 16byte page.
43142 The smallest offset in the page INSN can start is the case where START
43143 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
43144 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43146 Don't consider asm goto as jump, while it can contain a jump, it doesn't
43147 have to, control transfer to label(s) can be performed through other
43148 means, and also we estimate minimum length of all asm stmts as 0. */
43149 for (insn = start; insn; insn = NEXT_INSN (insn))
43153 if (LABEL_P (insn))
43155 int align = label_to_alignment (insn);
43156 int max_skip = label_to_max_skip (insn);
43160 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43161 already in the current 16 byte page, because otherwise
43162 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43163 bytes to reach 16 byte boundary. */
43165 || (align <= 3 && max_skip != (1 << align) - 1))
43168 fprintf (dump_file, "Label %i with max_skip %i\n",
43169 INSN_UID (insn), max_skip);
43172 while (nbytes + max_skip >= 16)
43174 start = NEXT_INSN (start);
43175 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43177 njumps--, isjump = true;
43180 nbytes -= min_insn_size (start);
43186 min_size = min_insn_size (insn);
43187 nbytes += min_size;
43189 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43190 INSN_UID (insn), min_size);
43191 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43199 start = NEXT_INSN (start);
43200 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43202 njumps--, isjump = true;
43205 nbytes -= min_insn_size (start);
43207 gcc_assert (njumps >= 0);
43209 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43210 INSN_UID (start), INSN_UID (insn), nbytes);
43212 if (njumps == 3 && isjump && nbytes < 16)
43214 int padsize = 15 - nbytes + min_insn_size (insn);
43217 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43218 INSN_UID (insn), padsize);
43219 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43225 /* AMD Athlon works faster
43226 when RET is not destination of conditional jump or directly preceded
43227 by other jump instruction. We avoid the penalty by inserting NOP just
43228 before the RET instructions in such cases. */
43230 ix86_pad_returns (void)
43235 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43237 basic_block bb = e->src;
43238 rtx_insn *ret = BB_END (bb);
43240 bool replace = false;
43242 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43243 || optimize_bb_for_size_p (bb))
43245 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43246 if (active_insn_p (prev) || LABEL_P (prev))
43248 if (prev && LABEL_P (prev))
43253 FOR_EACH_EDGE (e, ei, bb->preds)
43254 if (EDGE_FREQUENCY (e) && e->src->index >= 0
43255 && !(e->flags & EDGE_FALLTHRU))
43263 prev = prev_active_insn (ret);
43265 && ((JUMP_P (prev) && any_condjump_p (prev))
43268 /* Empty functions get branch mispredict even when
43269 the jump destination is not visible to us. */
43270 if (!prev && !optimize_function_for_size_p (cfun))
43275 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43281 /* Count the minimum number of instructions in BB. Return 4 if the
43282 number of instructions >= 4. */
43285 ix86_count_insn_bb (basic_block bb)
43288 int insn_count = 0;
43290 /* Count number of instructions in this block. Return 4 if the number
43291 of instructions >= 4. */
43292 FOR_BB_INSNS (bb, insn)
43294 /* Only happen in exit blocks. */
43296 && ANY_RETURN_P (PATTERN (insn)))
43299 if (NONDEBUG_INSN_P (insn)
43300 && GET_CODE (PATTERN (insn)) != USE
43301 && GET_CODE (PATTERN (insn)) != CLOBBER)
43304 if (insn_count >= 4)
43313 /* Count the minimum number of instructions in code path in BB.
43314 Return 4 if the number of instructions >= 4. */
43317 ix86_count_insn (basic_block bb)
43321 int min_prev_count;
43323 /* Only bother counting instructions along paths with no
43324 more than 2 basic blocks between entry and exit. Given
43325 that BB has an edge to exit, determine if a predecessor
43326 of BB has an edge from entry. If so, compute the number
43327 of instructions in the predecessor block. If there
43328 happen to be multiple such blocks, compute the minimum. */
43329 min_prev_count = 4;
43330 FOR_EACH_EDGE (e, ei, bb->preds)
43333 edge_iterator prev_ei;
43335 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43337 min_prev_count = 0;
43340 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43342 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43344 int count = ix86_count_insn_bb (e->src);
43345 if (count < min_prev_count)
43346 min_prev_count = count;
43352 if (min_prev_count < 4)
43353 min_prev_count += ix86_count_insn_bb (bb);
43355 return min_prev_count;
43358 /* Pad short function to 4 instructions. */
43361 ix86_pad_short_function (void)
43366 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43368 rtx_insn *ret = BB_END (e->src);
43369 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43371 int insn_count = ix86_count_insn (e->src);
43373 /* Pad short function. */
43374 if (insn_count < 4)
43376 rtx_insn *insn = ret;
43378 /* Find epilogue. */
43381 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43382 insn = PREV_INSN (insn);
43387 /* Two NOPs count as one instruction. */
43388 insn_count = 2 * (4 - insn_count);
43389 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43395 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43396 the epilogue, the Windows system unwinder will apply epilogue logic and
43397 produce incorrect offsets. This can be avoided by adding a nop between
43398 the last insn that can throw and the first insn of the epilogue. */
43401 ix86_seh_fixup_eh_fallthru (void)
43406 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43408 rtx_insn *insn, *next;
43410 /* Find the beginning of the epilogue. */
43411 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43412 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43417 /* We only care about preceding insns that can throw. */
43418 insn = prev_active_insn (insn);
43419 if (insn == NULL || !can_throw_internal (insn))
43422 /* Do not separate calls from their debug information. */
43423 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43425 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43426 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43431 emit_insn_after (gen_nops (const1_rtx), insn);
43435 /* Implement machine specific optimizations. We implement padding of returns
43436 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43440 /* We are freeing block_for_insn in the toplev to keep compatibility
43441 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43442 compute_bb_for_insn ();
43444 if (TARGET_SEH && current_function_has_exception_handlers ())
43445 ix86_seh_fixup_eh_fallthru ();
43447 if (optimize && optimize_function_for_speed_p (cfun))
43449 if (TARGET_PAD_SHORT_FUNCTION)
43450 ix86_pad_short_function ();
43451 else if (TARGET_PAD_RETURNS)
43452 ix86_pad_returns ();
43453 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43454 if (TARGET_FOUR_JUMP_LIMIT)
43455 ix86_avoid_jump_mispredicts ();
43460 /* Return nonzero when QImode register that must be represented via REX prefix
43463 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43466 extract_insn_cached (insn);
43467 for (i = 0; i < recog_data.n_operands; i++)
43468 if (GENERAL_REG_P (recog_data.operand[i])
43469 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43474 /* Return true when INSN mentions register that must be encoded using REX
43477 x86_extended_reg_mentioned_p (rtx insn)
43479 subrtx_iterator::array_type array;
43480 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43482 const_rtx x = *iter;
43484 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43490 /* If profitable, negate (without causing overflow) integer constant
43491 of mode MODE at location LOC. Return true in this case. */
43493 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43497 if (!CONST_INT_P (*loc))
43503 /* DImode x86_64 constants must fit in 32 bits. */
43504 gcc_assert (x86_64_immediate_operand (*loc, mode));
43515 gcc_unreachable ();
43518 /* Avoid overflows. */
43519 if (mode_signbit_p (mode, *loc))
43522 val = INTVAL (*loc);
43524 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43525 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43526 if ((val < 0 && val != -128)
43529 *loc = GEN_INT (-val);
43536 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43537 optabs would emit if we didn't have TFmode patterns. */
43540 x86_emit_floatuns (rtx operands[2])
43542 rtx_code_label *neglab, *donelab;
43543 rtx i0, i1, f0, in, out;
43544 machine_mode mode, inmode;
43546 inmode = GET_MODE (operands[1]);
43547 gcc_assert (inmode == SImode || inmode == DImode);
43550 in = force_reg (inmode, operands[1]);
43551 mode = GET_MODE (out);
43552 neglab = gen_label_rtx ();
43553 donelab = gen_label_rtx ();
43554 f0 = gen_reg_rtx (mode);
43556 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43558 expand_float (out, in, 0);
43560 emit_jump_insn (gen_jump (donelab));
43563 emit_label (neglab);
43565 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43567 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43569 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43571 expand_float (f0, i0, 0);
43573 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
43575 emit_label (donelab);
43578 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43579 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43580 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43581 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43583 /* Get a vector mode of the same size as the original but with elements
43584 twice as wide. This is only guaranteed to apply to integral vectors. */
43586 static inline machine_mode
43587 get_mode_wider_vector (machine_mode o)
43589 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43590 machine_mode n = GET_MODE_WIDER_MODE (o);
43591 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43592 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43596 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43597 fill target with val via vec_duplicate. */
43600 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43606 /* First attempt to recognize VAL as-is. */
43607 dup = gen_rtx_VEC_DUPLICATE (mode, val);
43608 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
43609 if (recog_memoized (insn) < 0)
43612 /* If that fails, force VAL into a register. */
43615 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
43616 seq = get_insns ();
43619 emit_insn_before (seq, insn);
43621 ok = recog_memoized (insn) >= 0;
43627 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43628 with all elements equal to VAR. Return true if successful. */
43631 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
43632 rtx target, rtx val)
43656 return ix86_vector_duplicate_value (mode, target, val);
43661 if (TARGET_SSE || TARGET_3DNOW_A)
43665 val = gen_lowpart (SImode, val);
43666 x = gen_rtx_TRUNCATE (HImode, val);
43667 x = gen_rtx_VEC_DUPLICATE (mode, x);
43668 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43680 return ix86_vector_duplicate_value (mode, target, val);
43684 struct expand_vec_perm_d dperm;
43688 memset (&dperm, 0, sizeof (dperm));
43689 dperm.target = target;
43690 dperm.vmode = mode;
43691 dperm.nelt = GET_MODE_NUNITS (mode);
43692 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
43693 dperm.one_operand_p = true;
43695 /* Extend to SImode using a paradoxical SUBREG. */
43696 tmp1 = gen_reg_rtx (SImode);
43697 emit_move_insn (tmp1, gen_lowpart (SImode, val));
43699 /* Insert the SImode value as low element of a V4SImode vector. */
43700 tmp2 = gen_reg_rtx (V4SImode);
43701 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
43702 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
43704 ok = (expand_vec_perm_1 (&dperm)
43705 || expand_vec_perm_broadcast_1 (&dperm));
43713 return ix86_vector_duplicate_value (mode, target, val);
43720 /* Replicate the value once into the next wider mode and recurse. */
43722 machine_mode smode, wsmode, wvmode;
43725 smode = GET_MODE_INNER (mode);
43726 wvmode = get_mode_wider_vector (mode);
43727 wsmode = GET_MODE_INNER (wvmode);
43729 val = convert_modes (wsmode, smode, val, true);
43730 x = expand_simple_binop (wsmode, ASHIFT, val,
43731 GEN_INT (GET_MODE_BITSIZE (smode)),
43732 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43733 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
43735 x = gen_reg_rtx (wvmode);
43736 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
43738 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
43745 return ix86_vector_duplicate_value (mode, target, val);
43748 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
43749 rtx x = gen_reg_rtx (hvmode);
43751 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43754 x = gen_rtx_VEC_CONCAT (mode, x, x);
43755 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43761 if (TARGET_AVX512BW)
43762 return ix86_vector_duplicate_value (mode, target, val);
43765 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
43766 rtx x = gen_reg_rtx (hvmode);
43768 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43771 x = gen_rtx_VEC_CONCAT (mode, x, x);
43772 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43781 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43782 whose ONE_VAR element is VAR, and other elements are zero. Return true
43786 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
43787 rtx target, rtx var, int one_var)
43789 machine_mode vsimode;
43792 bool use_vector_set = false;
43797 /* For SSE4.1, we normally use vector set. But if the second
43798 element is zero and inter-unit moves are OK, we use movq
43800 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
43801 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
43807 use_vector_set = TARGET_SSE4_1;
43810 use_vector_set = TARGET_SSE2;
43813 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
43820 use_vector_set = TARGET_AVX;
43823 /* Use ix86_expand_vector_set in 64bit mode only. */
43824 use_vector_set = TARGET_AVX && TARGET_64BIT;
43830 if (use_vector_set)
43832 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
43833 var = force_reg (GET_MODE_INNER (mode), var);
43834 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43850 var = force_reg (GET_MODE_INNER (mode), var);
43851 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
43852 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43857 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
43858 new_target = gen_reg_rtx (mode);
43860 new_target = target;
43861 var = force_reg (GET_MODE_INNER (mode), var);
43862 x = gen_rtx_VEC_DUPLICATE (mode, var);
43863 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
43864 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
43867 /* We need to shuffle the value to the correct position, so
43868 create a new pseudo to store the intermediate result. */
43870 /* With SSE2, we can use the integer shuffle insns. */
43871 if (mode != V4SFmode && TARGET_SSE2)
43873 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
43875 GEN_INT (one_var == 1 ? 0 : 1),
43876 GEN_INT (one_var == 2 ? 0 : 1),
43877 GEN_INT (one_var == 3 ? 0 : 1)));
43878 if (target != new_target)
43879 emit_move_insn (target, new_target);
43883 /* Otherwise convert the intermediate result to V4SFmode and
43884 use the SSE1 shuffle instructions. */
43885 if (mode != V4SFmode)
43887 tmp = gen_reg_rtx (V4SFmode);
43888 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
43893 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
43895 GEN_INT (one_var == 1 ? 0 : 1),
43896 GEN_INT (one_var == 2 ? 0+4 : 1+4),
43897 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
43899 if (mode != V4SFmode)
43900 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
43901 else if (tmp != target)
43902 emit_move_insn (target, tmp);
43904 else if (target != new_target)
43905 emit_move_insn (target, new_target);
43910 vsimode = V4SImode;
43916 vsimode = V2SImode;
43922 /* Zero extend the variable element to SImode and recurse. */
43923 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
43925 x = gen_reg_rtx (vsimode);
43926 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
43928 gcc_unreachable ();
43930 emit_move_insn (target, gen_lowpart (mode, x));
43938 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43939 consisting of the values in VALS. It is known that all elements
43940 except ONE_VAR are constants. Return true if successful. */
43943 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
43944 rtx target, rtx vals, int one_var)
43946 rtx var = XVECEXP (vals, 0, one_var);
43947 machine_mode wmode;
43950 const_vec = copy_rtx (vals);
43951 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
43952 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
43960 /* For the two element vectors, it's just as easy to use
43961 the general case. */
43965 /* Use ix86_expand_vector_set in 64bit mode only. */
43988 /* There's no way to set one QImode entry easily. Combine
43989 the variable value with its adjacent constant value, and
43990 promote to an HImode set. */
43991 x = XVECEXP (vals, 0, one_var ^ 1);
43994 var = convert_modes (HImode, QImode, var, true);
43995 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
43996 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43997 x = GEN_INT (INTVAL (x) & 0xff);
44001 var = convert_modes (HImode, QImode, var, true);
44002 x = gen_int_mode (INTVAL (x) << 8, HImode);
44004 if (x != const0_rtx)
44005 var = expand_simple_binop (HImode, IOR, var, x, var,
44006 1, OPTAB_LIB_WIDEN);
44008 x = gen_reg_rtx (wmode);
44009 emit_move_insn (x, gen_lowpart (wmode, const_vec));
44010 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
44012 emit_move_insn (target, gen_lowpart (mode, x));
44019 emit_move_insn (target, const_vec);
44020 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44024 /* A subroutine of ix86_expand_vector_init_general. Use vector
44025 concatenate to handle the most general case: all values variable,
44026 and none identical. */
44029 ix86_expand_vector_init_concat (machine_mode mode,
44030 rtx target, rtx *ops, int n)
44032 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
44033 rtx first[16], second[8], third[4];
44085 gcc_unreachable ();
44088 if (!register_operand (ops[1], cmode))
44089 ops[1] = force_reg (cmode, ops[1]);
44090 if (!register_operand (ops[0], cmode))
44091 ops[0] = force_reg (cmode, ops[0]);
44092 emit_insn (gen_rtx_SET (VOIDmode, target,
44093 gen_rtx_VEC_CONCAT (mode, ops[0],
44113 gcc_unreachable ();
44137 gcc_unreachable ();
44155 gcc_unreachable ();
44160 /* FIXME: We process inputs backward to help RA. PR 36222. */
44163 for (; i > 0; i -= 2, j--)
44165 first[j] = gen_reg_rtx (cmode);
44166 v = gen_rtvec (2, ops[i - 1], ops[i]);
44167 ix86_expand_vector_init (false, first[j],
44168 gen_rtx_PARALLEL (cmode, v));
44174 gcc_assert (hmode != VOIDmode);
44175 gcc_assert (gmode != VOIDmode);
44176 for (i = j = 0; i < n; i += 2, j++)
44178 second[j] = gen_reg_rtx (hmode);
44179 ix86_expand_vector_init_concat (hmode, second [j],
44183 for (i = j = 0; i < n; i += 2, j++)
44185 third[j] = gen_reg_rtx (gmode);
44186 ix86_expand_vector_init_concat (gmode, third[j],
44190 ix86_expand_vector_init_concat (mode, target, third, n);
44194 gcc_assert (hmode != VOIDmode);
44195 for (i = j = 0; i < n; i += 2, j++)
44197 second[j] = gen_reg_rtx (hmode);
44198 ix86_expand_vector_init_concat (hmode, second [j],
44202 ix86_expand_vector_init_concat (mode, target, second, n);
44205 ix86_expand_vector_init_concat (mode, target, first, n);
44209 gcc_unreachable ();
44213 /* A subroutine of ix86_expand_vector_init_general. Use vector
44214 interleave to handle the most general case: all values variable,
44215 and none identical. */
44218 ix86_expand_vector_init_interleave (machine_mode mode,
44219 rtx target, rtx *ops, int n)
44221 machine_mode first_imode, second_imode, third_imode, inner_mode;
44224 rtx (*gen_load_even) (rtx, rtx, rtx);
44225 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44226 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44231 gen_load_even = gen_vec_setv8hi;
44232 gen_interleave_first_low = gen_vec_interleave_lowv4si;
44233 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44234 inner_mode = HImode;
44235 first_imode = V4SImode;
44236 second_imode = V2DImode;
44237 third_imode = VOIDmode;
44240 gen_load_even = gen_vec_setv16qi;
44241 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44242 gen_interleave_second_low = gen_vec_interleave_lowv4si;
44243 inner_mode = QImode;
44244 first_imode = V8HImode;
44245 second_imode = V4SImode;
44246 third_imode = V2DImode;
44249 gcc_unreachable ();
44252 for (i = 0; i < n; i++)
44254 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44255 op0 = gen_reg_rtx (SImode);
44256 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44258 /* Insert the SImode value as low element of V4SImode vector. */
44259 op1 = gen_reg_rtx (V4SImode);
44260 op0 = gen_rtx_VEC_MERGE (V4SImode,
44261 gen_rtx_VEC_DUPLICATE (V4SImode,
44263 CONST0_RTX (V4SImode),
44265 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
44267 /* Cast the V4SImode vector back to a vector in orignal mode. */
44268 op0 = gen_reg_rtx (mode);
44269 emit_move_insn (op0, gen_lowpart (mode, op1));
44271 /* Load even elements into the second position. */
44272 emit_insn (gen_load_even (op0,
44273 force_reg (inner_mode,
44277 /* Cast vector to FIRST_IMODE vector. */
44278 ops[i] = gen_reg_rtx (first_imode);
44279 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44282 /* Interleave low FIRST_IMODE vectors. */
44283 for (i = j = 0; i < n; i += 2, j++)
44285 op0 = gen_reg_rtx (first_imode);
44286 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44288 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44289 ops[j] = gen_reg_rtx (second_imode);
44290 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44293 /* Interleave low SECOND_IMODE vectors. */
44294 switch (second_imode)
44297 for (i = j = 0; i < n / 2; i += 2, j++)
44299 op0 = gen_reg_rtx (second_imode);
44300 emit_insn (gen_interleave_second_low (op0, ops[i],
44303 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44305 ops[j] = gen_reg_rtx (third_imode);
44306 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44308 second_imode = V2DImode;
44309 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44313 op0 = gen_reg_rtx (second_imode);
44314 emit_insn (gen_interleave_second_low (op0, ops[0],
44317 /* Cast the SECOND_IMODE vector back to a vector on original
44319 emit_insn (gen_rtx_SET (VOIDmode, target,
44320 gen_lowpart (mode, op0)));
44324 gcc_unreachable ();
44328 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44329 all values variable, and none identical. */
44332 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44333 rtx target, rtx vals)
44335 rtx ops[64], op0, op1, op2, op3, op4, op5;
44336 machine_mode half_mode = VOIDmode;
44337 machine_mode quarter_mode = VOIDmode;
44344 if (!mmx_ok && !TARGET_SSE)
44360 n = GET_MODE_NUNITS (mode);
44361 for (i = 0; i < n; i++)
44362 ops[i] = XVECEXP (vals, 0, i);
44363 ix86_expand_vector_init_concat (mode, target, ops, n);
44367 half_mode = V16QImode;
44371 half_mode = V8HImode;
44375 n = GET_MODE_NUNITS (mode);
44376 for (i = 0; i < n; i++)
44377 ops[i] = XVECEXP (vals, 0, i);
44378 op0 = gen_reg_rtx (half_mode);
44379 op1 = gen_reg_rtx (half_mode);
44380 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44382 ix86_expand_vector_init_interleave (half_mode, op1,
44383 &ops [n >> 1], n >> 2);
44384 emit_insn (gen_rtx_SET (VOIDmode, target,
44385 gen_rtx_VEC_CONCAT (mode, op0, op1)));
44389 quarter_mode = V16QImode;
44390 half_mode = V32QImode;
44394 quarter_mode = V8HImode;
44395 half_mode = V16HImode;
44399 n = GET_MODE_NUNITS (mode);
44400 for (i = 0; i < n; i++)
44401 ops[i] = XVECEXP (vals, 0, i);
44402 op0 = gen_reg_rtx (quarter_mode);
44403 op1 = gen_reg_rtx (quarter_mode);
44404 op2 = gen_reg_rtx (quarter_mode);
44405 op3 = gen_reg_rtx (quarter_mode);
44406 op4 = gen_reg_rtx (half_mode);
44407 op5 = gen_reg_rtx (half_mode);
44408 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44410 ix86_expand_vector_init_interleave (quarter_mode, op1,
44411 &ops [n >> 2], n >> 3);
44412 ix86_expand_vector_init_interleave (quarter_mode, op2,
44413 &ops [n >> 1], n >> 3);
44414 ix86_expand_vector_init_interleave (quarter_mode, op3,
44415 &ops [(n >> 1) | (n >> 2)], n >> 3);
44416 emit_insn (gen_rtx_SET (VOIDmode, op4,
44417 gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44418 emit_insn (gen_rtx_SET (VOIDmode, op5,
44419 gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44420 emit_insn (gen_rtx_SET (VOIDmode, target,
44421 gen_rtx_VEC_CONCAT (mode, op4, op5)));
44425 if (!TARGET_SSE4_1)
44433 /* Don't use ix86_expand_vector_init_interleave if we can't
44434 move from GPR to SSE register directly. */
44435 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44438 n = GET_MODE_NUNITS (mode);
44439 for (i = 0; i < n; i++)
44440 ops[i] = XVECEXP (vals, 0, i);
44441 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44449 gcc_unreachable ();
44453 int i, j, n_elts, n_words, n_elt_per_word;
44454 machine_mode inner_mode;
44455 rtx words[4], shift;
44457 inner_mode = GET_MODE_INNER (mode);
44458 n_elts = GET_MODE_NUNITS (mode);
44459 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44460 n_elt_per_word = n_elts / n_words;
44461 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44463 for (i = 0; i < n_words; ++i)
44465 rtx word = NULL_RTX;
44467 for (j = 0; j < n_elt_per_word; ++j)
44469 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44470 elt = convert_modes (word_mode, inner_mode, elt, true);
44476 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44477 word, 1, OPTAB_LIB_WIDEN);
44478 word = expand_simple_binop (word_mode, IOR, word, elt,
44479 word, 1, OPTAB_LIB_WIDEN);
44487 emit_move_insn (target, gen_lowpart (mode, words[0]));
44488 else if (n_words == 2)
44490 rtx tmp = gen_reg_rtx (mode);
44491 emit_clobber (tmp);
44492 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44493 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44494 emit_move_insn (target, tmp);
44496 else if (n_words == 4)
44498 rtx tmp = gen_reg_rtx (V4SImode);
44499 gcc_assert (word_mode == SImode);
44500 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44501 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44502 emit_move_insn (target, gen_lowpart (mode, tmp));
44505 gcc_unreachable ();
44509 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44510 instructions unless MMX_OK is true. */
44513 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44515 machine_mode mode = GET_MODE (target);
44516 machine_mode inner_mode = GET_MODE_INNER (mode);
44517 int n_elts = GET_MODE_NUNITS (mode);
44518 int n_var = 0, one_var = -1;
44519 bool all_same = true, all_const_zero = true;
44523 for (i = 0; i < n_elts; ++i)
44525 x = XVECEXP (vals, 0, i);
44526 if (!(CONST_INT_P (x)
44527 || GET_CODE (x) == CONST_DOUBLE
44528 || GET_CODE (x) == CONST_FIXED))
44529 n_var++, one_var = i;
44530 else if (x != CONST0_RTX (inner_mode))
44531 all_const_zero = false;
44532 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44536 /* Constants are best loaded from the constant pool. */
44539 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44543 /* If all values are identical, broadcast the value. */
44545 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44546 XVECEXP (vals, 0, 0)))
44549 /* Values where only one field is non-constant are best loaded from
44550 the pool and overwritten via move later. */
44554 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44555 XVECEXP (vals, 0, one_var),
44559 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44563 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44567 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44569 machine_mode mode = GET_MODE (target);
44570 machine_mode inner_mode = GET_MODE_INNER (mode);
44571 machine_mode half_mode;
44572 bool use_vec_merge = false;
44574 static rtx (*gen_extract[6][2]) (rtx, rtx)
44576 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44577 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44578 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44579 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44580 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44581 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44583 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44585 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44586 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44587 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44588 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44589 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44590 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44600 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44601 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44603 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44605 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44606 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44612 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
44616 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44617 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
44619 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44621 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44622 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44629 /* For the two element vectors, we implement a VEC_CONCAT with
44630 the extraction of the other element. */
44632 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
44633 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
44636 op0 = val, op1 = tmp;
44638 op0 = tmp, op1 = val;
44640 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
44641 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44646 use_vec_merge = TARGET_SSE4_1;
44653 use_vec_merge = true;
44657 /* tmp = target = A B C D */
44658 tmp = copy_to_reg (target);
44659 /* target = A A B B */
44660 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
44661 /* target = X A B B */
44662 ix86_expand_vector_set (false, target, val, 0);
44663 /* target = A X C D */
44664 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44665 const1_rtx, const0_rtx,
44666 GEN_INT (2+4), GEN_INT (3+4)));
44670 /* tmp = target = A B C D */
44671 tmp = copy_to_reg (target);
44672 /* tmp = X B C D */
44673 ix86_expand_vector_set (false, tmp, val, 0);
44674 /* target = A B X D */
44675 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44676 const0_rtx, const1_rtx,
44677 GEN_INT (0+4), GEN_INT (3+4)));
44681 /* tmp = target = A B C D */
44682 tmp = copy_to_reg (target);
44683 /* tmp = X B C D */
44684 ix86_expand_vector_set (false, tmp, val, 0);
44685 /* target = A B X D */
44686 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44687 const0_rtx, const1_rtx,
44688 GEN_INT (2+4), GEN_INT (0+4)));
44692 gcc_unreachable ();
44697 use_vec_merge = TARGET_SSE4_1;
44701 /* Element 0 handled by vec_merge below. */
44704 use_vec_merge = true;
44710 /* With SSE2, use integer shuffles to swap element 0 and ELT,
44711 store into element 0, then shuffle them back. */
44715 order[0] = GEN_INT (elt);
44716 order[1] = const1_rtx;
44717 order[2] = const2_rtx;
44718 order[3] = GEN_INT (3);
44719 order[elt] = const0_rtx;
44721 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44722 order[1], order[2], order[3]));
44724 ix86_expand_vector_set (false, target, val, 0);
44726 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44727 order[1], order[2], order[3]));
44731 /* For SSE1, we have to reuse the V4SF code. */
44732 rtx t = gen_reg_rtx (V4SFmode);
44733 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
44734 emit_move_insn (target, gen_lowpart (mode, t));
44739 use_vec_merge = TARGET_SSE2;
44742 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44746 use_vec_merge = TARGET_SSE4_1;
44753 half_mode = V16QImode;
44759 half_mode = V8HImode;
44765 half_mode = V4SImode;
44771 half_mode = V2DImode;
44777 half_mode = V4SFmode;
44783 half_mode = V2DFmode;
44789 /* Compute offset. */
44793 gcc_assert (i <= 1);
44795 /* Extract the half. */
44796 tmp = gen_reg_rtx (half_mode);
44797 emit_insn (gen_extract[j][i] (tmp, target));
44799 /* Put val in tmp at elt. */
44800 ix86_expand_vector_set (false, tmp, val, elt);
44803 emit_insn (gen_insert[j][i] (target, target, tmp));
44807 if (TARGET_AVX512F)
44809 tmp = gen_reg_rtx (mode);
44810 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44811 gen_rtx_VEC_DUPLICATE (mode, val)));
44812 emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
44813 force_reg (QImode, GEN_INT (1 << elt))));
44819 if (TARGET_AVX512F)
44821 tmp = gen_reg_rtx (mode);
44822 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44823 gen_rtx_VEC_DUPLICATE (mode, val)));
44824 emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
44825 force_reg (QImode, GEN_INT (1 << elt))));
44831 if (TARGET_AVX512F)
44833 tmp = gen_reg_rtx (mode);
44834 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44835 gen_rtx_VEC_DUPLICATE (mode, val)));
44836 emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
44837 force_reg (HImode, GEN_INT (1 << elt))));
44843 if (TARGET_AVX512F)
44845 tmp = gen_reg_rtx (mode);
44846 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44847 gen_rtx_VEC_DUPLICATE (mode, val)));
44848 emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
44849 force_reg (HImode, GEN_INT (1 << elt))));
44855 if (TARGET_AVX512F && TARGET_AVX512BW)
44857 tmp = gen_reg_rtx (mode);
44858 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44859 gen_rtx_VEC_DUPLICATE (mode, val)));
44860 emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
44861 force_reg (SImode, GEN_INT (1 << elt))));
44867 if (TARGET_AVX512F && TARGET_AVX512BW)
44869 tmp = gen_reg_rtx (mode);
44870 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44871 gen_rtx_VEC_DUPLICATE (mode, val)));
44872 emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
44873 force_reg (DImode, GEN_INT (1 << elt))));
44885 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
44886 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
44887 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44891 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
44893 emit_move_insn (mem, target);
44895 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
44896 emit_move_insn (tmp, val);
44898 emit_move_insn (target, mem);
44903 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
44905 machine_mode mode = GET_MODE (vec);
44906 machine_mode inner_mode = GET_MODE_INNER (mode);
44907 bool use_vec_extr = false;
44920 use_vec_extr = true;
44924 use_vec_extr = TARGET_SSE4_1;
44936 tmp = gen_reg_rtx (mode);
44937 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
44938 GEN_INT (elt), GEN_INT (elt),
44939 GEN_INT (elt+4), GEN_INT (elt+4)));
44943 tmp = gen_reg_rtx (mode);
44944 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
44948 gcc_unreachable ();
44951 use_vec_extr = true;
44956 use_vec_extr = TARGET_SSE4_1;
44970 tmp = gen_reg_rtx (mode);
44971 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
44972 GEN_INT (elt), GEN_INT (elt),
44973 GEN_INT (elt), GEN_INT (elt)));
44977 tmp = gen_reg_rtx (mode);
44978 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
44982 gcc_unreachable ();
44985 use_vec_extr = true;
44990 /* For SSE1, we have to reuse the V4SF code. */
44991 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
44992 gen_lowpart (V4SFmode, vec), elt);
44998 use_vec_extr = TARGET_SSE2;
45001 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45005 use_vec_extr = TARGET_SSE4_1;
45011 tmp = gen_reg_rtx (V4SFmode);
45013 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
45015 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
45016 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45024 tmp = gen_reg_rtx (V2DFmode);
45026 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
45028 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
45029 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45037 tmp = gen_reg_rtx (V16QImode);
45039 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
45041 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
45042 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45050 tmp = gen_reg_rtx (V8HImode);
45052 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
45054 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
45055 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45063 tmp = gen_reg_rtx (V4SImode);
45065 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
45067 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
45068 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45076 tmp = gen_reg_rtx (V2DImode);
45078 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45080 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45081 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45087 if (TARGET_AVX512BW)
45089 tmp = gen_reg_rtx (V16HImode);
45091 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45093 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45094 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45100 if (TARGET_AVX512BW)
45102 tmp = gen_reg_rtx (V32QImode);
45104 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45106 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45107 ix86_expand_vector_extract (false, target, tmp, elt & 31);
45113 tmp = gen_reg_rtx (V8SFmode);
45115 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45117 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45118 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45122 tmp = gen_reg_rtx (V4DFmode);
45124 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45126 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45127 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45131 tmp = gen_reg_rtx (V8SImode);
45133 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45135 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45136 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45140 tmp = gen_reg_rtx (V4DImode);
45142 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45144 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45145 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45149 /* ??? Could extract the appropriate HImode element and shift. */
45156 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45157 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45159 /* Let the rtl optimizers know about the zero extension performed. */
45160 if (inner_mode == QImode || inner_mode == HImode)
45162 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45163 target = gen_lowpart (SImode, target);
45166 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45170 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45172 emit_move_insn (mem, vec);
45174 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45175 emit_move_insn (target, tmp);
45179 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45180 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45181 The upper bits of DEST are undefined, though they shouldn't cause
45182 exceptions (some bits from src or all zeros are ok). */
45185 emit_reduc_half (rtx dest, rtx src, int i)
45188 switch (GET_MODE (src))
45192 tem = gen_sse_movhlps (dest, src, src);
45194 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45195 GEN_INT (1 + 4), GEN_INT (1 + 4));
45198 tem = gen_vec_interleave_highv2df (dest, src, src);
45204 d = gen_reg_rtx (V1TImode);
45205 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45210 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45212 tem = gen_avx_shufps256 (dest, src, src,
45213 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45217 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45219 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45227 if (GET_MODE (dest) != V4DImode)
45228 d = gen_reg_rtx (V4DImode);
45229 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45230 gen_lowpart (V4DImode, src),
45235 d = gen_reg_rtx (V2TImode);
45236 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45247 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45248 gen_lowpart (V16SImode, src),
45249 gen_lowpart (V16SImode, src),
45250 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45251 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45252 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45253 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45254 GEN_INT (0xC), GEN_INT (0xD),
45255 GEN_INT (0xE), GEN_INT (0xF),
45256 GEN_INT (0x10), GEN_INT (0x11),
45257 GEN_INT (0x12), GEN_INT (0x13),
45258 GEN_INT (0x14), GEN_INT (0x15),
45259 GEN_INT (0x16), GEN_INT (0x17));
45261 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45262 gen_lowpart (V16SImode, src),
45263 GEN_INT (i == 128 ? 0x2 : 0x1),
45267 GEN_INT (i == 128 ? 0x6 : 0x5),
45271 GEN_INT (i == 128 ? 0xA : 0x9),
45275 GEN_INT (i == 128 ? 0xE : 0xD),
45281 gcc_unreachable ();
45285 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45288 /* Expand a vector reduction. FN is the binary pattern to reduce;
45289 DEST is the destination; IN is the input vector. */
45292 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45294 rtx half, dst, vec = in;
45295 machine_mode mode = GET_MODE (in);
45298 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45300 && mode == V8HImode
45301 && fn == gen_uminv8hi3)
45303 emit_insn (gen_sse4_1_phminposuw (dest, in));
45307 for (i = GET_MODE_BITSIZE (mode);
45308 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45311 half = gen_reg_rtx (mode);
45312 emit_reduc_half (half, vec, i);
45313 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45316 dst = gen_reg_rtx (mode);
45317 emit_insn (fn (dst, half, vec));
45322 /* Target hook for scalar_mode_supported_p. */
45324 ix86_scalar_mode_supported_p (machine_mode mode)
45326 if (DECIMAL_FLOAT_MODE_P (mode))
45327 return default_decimal_float_supported_p ();
45328 else if (mode == TFmode)
45331 return default_scalar_mode_supported_p (mode);
45334 /* Implements target hook vector_mode_supported_p. */
45336 ix86_vector_mode_supported_p (machine_mode mode)
45338 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45340 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45342 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45344 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45346 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45348 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45353 /* Implement target hook libgcc_floating_mode_supported_p. */
45355 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45365 #ifdef IX86_NO_LIBGCC_TFMODE
45367 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45368 return TARGET_LONG_DOUBLE_128;
45378 /* Target hook for c_mode_for_suffix. */
45379 static machine_mode
45380 ix86_c_mode_for_suffix (char suffix)
45390 /* Worker function for TARGET_MD_ASM_CLOBBERS.
45392 We do this in the new i386 backend to maintain source compatibility
45393 with the old cc0-based compiler. */
45396 ix86_md_asm_clobbers (tree, tree, tree clobbers)
45398 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
45400 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
45405 /* Implements target vector targetm.asm.encode_section_info. */
45407 static void ATTRIBUTE_UNUSED
45408 ix86_encode_section_info (tree decl, rtx rtl, int first)
45410 default_encode_section_info (decl, rtl, first);
45412 if (ix86_in_large_data_p (decl))
45413 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45416 /* Worker function for REVERSE_CONDITION. */
45419 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45421 return (mode != CCFPmode && mode != CCFPUmode
45422 ? reverse_condition (code)
45423 : reverse_condition_maybe_unordered (code));
45426 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45430 output_387_reg_move (rtx insn, rtx *operands)
45432 if (REG_P (operands[0]))
45434 if (REG_P (operands[1])
45435 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45437 if (REGNO (operands[0]) == FIRST_STACK_REG)
45438 return output_387_ffreep (operands, 0);
45439 return "fstp\t%y0";
45441 if (STACK_TOP_P (operands[0]))
45442 return "fld%Z1\t%y1";
45445 else if (MEM_P (operands[0]))
45447 gcc_assert (REG_P (operands[1]));
45448 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45449 return "fstp%Z0\t%y0";
45452 /* There is no non-popping store to memory for XFmode.
45453 So if we need one, follow the store with a load. */
45454 if (GET_MODE (operands[0]) == XFmode)
45455 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45457 return "fst%Z0\t%y0";
45464 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45465 FP status register is set. */
45468 ix86_emit_fp_unordered_jump (rtx label)
45470 rtx reg = gen_reg_rtx (HImode);
45473 emit_insn (gen_x86_fnstsw_1 (reg));
45475 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45477 emit_insn (gen_x86_sahf_1 (reg));
45479 temp = gen_rtx_REG (CCmode, FLAGS_REG);
45480 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45484 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45486 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45487 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45490 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45491 gen_rtx_LABEL_REF (VOIDmode, label),
45493 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
45495 emit_jump_insn (temp);
45496 predict_jump (REG_BR_PROB_BASE * 10 / 100);
45499 /* Output code to perform a log1p XFmode calculation. */
45501 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45503 rtx_code_label *label1 = gen_label_rtx ();
45504 rtx_code_label *label2 = gen_label_rtx ();
45506 rtx tmp = gen_reg_rtx (XFmode);
45507 rtx tmp2 = gen_reg_rtx (XFmode);
45510 emit_insn (gen_absxf2 (tmp, op1));
45511 test = gen_rtx_GE (VOIDmode, tmp,
45512 CONST_DOUBLE_FROM_REAL_VALUE (
45513 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45515 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45517 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45518 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45519 emit_jump (label2);
45521 emit_label (label1);
45522 emit_move_insn (tmp, CONST1_RTX (XFmode));
45523 emit_insn (gen_addxf3 (tmp, op1, tmp));
45524 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45525 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45527 emit_label (label2);
45530 /* Emit code for round calculation. */
45531 void ix86_emit_i387_round (rtx op0, rtx op1)
45533 machine_mode inmode = GET_MODE (op1);
45534 machine_mode outmode = GET_MODE (op0);
45535 rtx e1, e2, res, tmp, tmp1, half;
45536 rtx scratch = gen_reg_rtx (HImode);
45537 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45538 rtx_code_label *jump_label = gen_label_rtx ();
45540 rtx (*gen_abs) (rtx, rtx);
45541 rtx (*gen_neg) (rtx, rtx);
45546 gen_abs = gen_abssf2;
45549 gen_abs = gen_absdf2;
45552 gen_abs = gen_absxf2;
45555 gcc_unreachable ();
45561 gen_neg = gen_negsf2;
45564 gen_neg = gen_negdf2;
45567 gen_neg = gen_negxf2;
45570 gen_neg = gen_neghi2;
45573 gen_neg = gen_negsi2;
45576 gen_neg = gen_negdi2;
45579 gcc_unreachable ();
45582 e1 = gen_reg_rtx (inmode);
45583 e2 = gen_reg_rtx (inmode);
45584 res = gen_reg_rtx (outmode);
45586 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
45588 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45590 /* scratch = fxam(op1) */
45591 emit_insn (gen_rtx_SET (VOIDmode, scratch,
45592 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
45594 /* e1 = fabs(op1) */
45595 emit_insn (gen_abs (e1, op1));
45597 /* e2 = e1 + 0.5 */
45598 half = force_reg (inmode, half);
45599 emit_insn (gen_rtx_SET (VOIDmode, e2,
45600 gen_rtx_PLUS (inmode, e1, half)));
45602 /* res = floor(e2) */
45603 if (inmode != XFmode)
45605 tmp1 = gen_reg_rtx (XFmode);
45607 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
45608 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
45618 rtx tmp0 = gen_reg_rtx (XFmode);
45620 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
45622 emit_insn (gen_rtx_SET (VOIDmode, res,
45623 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
45624 UNSPEC_TRUNC_NOOP)));
45628 emit_insn (gen_frndintxf2_floor (res, tmp1));
45631 emit_insn (gen_lfloorxfhi2 (res, tmp1));
45634 emit_insn (gen_lfloorxfsi2 (res, tmp1));
45637 emit_insn (gen_lfloorxfdi2 (res, tmp1));
45640 gcc_unreachable ();
45643 /* flags = signbit(a) */
45644 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
45646 /* if (flags) then res = -res */
45647 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
45648 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
45649 gen_rtx_LABEL_REF (VOIDmode, jump_label),
45651 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
45652 predict_jump (REG_BR_PROB_BASE * 50 / 100);
45653 JUMP_LABEL (insn) = jump_label;
45655 emit_insn (gen_neg (res, res));
45657 emit_label (jump_label);
45658 LABEL_NUSES (jump_label) = 1;
45660 emit_move_insn (op0, res);
45663 /* Output code to perform a Newton-Rhapson approximation of a single precision
45664 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
45666 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
45668 rtx x0, x1, e0, e1;
45670 x0 = gen_reg_rtx (mode);
45671 e0 = gen_reg_rtx (mode);
45672 e1 = gen_reg_rtx (mode);
45673 x1 = gen_reg_rtx (mode);
45675 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
45677 b = force_reg (mode, b);
45679 /* x0 = rcp(b) estimate */
45680 if (mode == V16SFmode || mode == V8DFmode)
45681 emit_insn (gen_rtx_SET (VOIDmode, x0,
45682 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45685 emit_insn (gen_rtx_SET (VOIDmode, x0,
45686 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45690 emit_insn (gen_rtx_SET (VOIDmode, e0,
45691 gen_rtx_MULT (mode, x0, b)));
45694 emit_insn (gen_rtx_SET (VOIDmode, e0,
45695 gen_rtx_MULT (mode, x0, e0)));
45698 emit_insn (gen_rtx_SET (VOIDmode, e1,
45699 gen_rtx_PLUS (mode, x0, x0)));
45702 emit_insn (gen_rtx_SET (VOIDmode, x1,
45703 gen_rtx_MINUS (mode, e1, e0)));
45706 emit_insn (gen_rtx_SET (VOIDmode, res,
45707 gen_rtx_MULT (mode, a, x1)));
45710 /* Output code to perform a Newton-Rhapson approximation of a
45711 single precision floating point [reciprocal] square root. */
45713 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
45716 rtx x0, e0, e1, e2, e3, mthree, mhalf;
45720 x0 = gen_reg_rtx (mode);
45721 e0 = gen_reg_rtx (mode);
45722 e1 = gen_reg_rtx (mode);
45723 e2 = gen_reg_rtx (mode);
45724 e3 = gen_reg_rtx (mode);
45726 real_from_integer (&r, VOIDmode, -3, SIGNED);
45727 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45729 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
45730 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45731 unspec = UNSPEC_RSQRT;
45733 if (VECTOR_MODE_P (mode))
45735 mthree = ix86_build_const_vector (mode, true, mthree);
45736 mhalf = ix86_build_const_vector (mode, true, mhalf);
45737 /* There is no 512-bit rsqrt. There is however rsqrt14. */
45738 if (GET_MODE_SIZE (mode) == 64)
45739 unspec = UNSPEC_RSQRT14;
45742 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
45743 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
45745 a = force_reg (mode, a);
45747 /* x0 = rsqrt(a) estimate */
45748 emit_insn (gen_rtx_SET (VOIDmode, x0,
45749 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45752 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
45757 zero = gen_reg_rtx (mode);
45758 mask = gen_reg_rtx (mode);
45760 zero = force_reg (mode, CONST0_RTX(mode));
45762 /* Handle masked compare. */
45763 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
45765 mask = gen_reg_rtx (HImode);
45766 /* Imm value 0x4 corresponds to not-equal comparison. */
45767 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
45768 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
45772 emit_insn (gen_rtx_SET (VOIDmode, mask,
45773 gen_rtx_NE (mode, zero, a)));
45775 emit_insn (gen_rtx_SET (VOIDmode, x0,
45776 gen_rtx_AND (mode, x0, mask)));
45781 emit_insn (gen_rtx_SET (VOIDmode, e0,
45782 gen_rtx_MULT (mode, x0, a)));
45784 emit_insn (gen_rtx_SET (VOIDmode, e1,
45785 gen_rtx_MULT (mode, e0, x0)));
45788 mthree = force_reg (mode, mthree);
45789 emit_insn (gen_rtx_SET (VOIDmode, e2,
45790 gen_rtx_PLUS (mode, e1, mthree)));
45792 mhalf = force_reg (mode, mhalf);
45794 /* e3 = -.5 * x0 */
45795 emit_insn (gen_rtx_SET (VOIDmode, e3,
45796 gen_rtx_MULT (mode, x0, mhalf)));
45798 /* e3 = -.5 * e0 */
45799 emit_insn (gen_rtx_SET (VOIDmode, e3,
45800 gen_rtx_MULT (mode, e0, mhalf)));
45801 /* ret = e2 * e3 */
45802 emit_insn (gen_rtx_SET (VOIDmode, res,
45803 gen_rtx_MULT (mode, e2, e3)));
45806 #ifdef TARGET_SOLARIS
45807 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
45810 i386_solaris_elf_named_section (const char *name, unsigned int flags,
45813 /* With Binutils 2.15, the "@unwind" marker must be specified on
45814 every occurrence of the ".eh_frame" section, not just the first
45817 && strcmp (name, ".eh_frame") == 0)
45819 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
45820 flags & SECTION_WRITE ? "aw" : "a");
45825 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
45827 solaris_elf_asm_comdat_section (name, flags, decl);
45832 default_elf_asm_named_section (name, flags, decl);
45834 #endif /* TARGET_SOLARIS */
45836 /* Return the mangling of TYPE if it is an extended fundamental type. */
45838 static const char *
45839 ix86_mangle_type (const_tree type)
45841 type = TYPE_MAIN_VARIANT (type);
45843 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
45844 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
45847 switch (TYPE_MODE (type))
45850 /* __float128 is "g". */
45853 /* "long double" or __float80 is "e". */
45860 /* For 32-bit code we can save PIC register setup by using
45861 __stack_chk_fail_local hidden function instead of calling
45862 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
45863 register, so it is better to call __stack_chk_fail directly. */
45865 static tree ATTRIBUTE_UNUSED
45866 ix86_stack_protect_fail (void)
45868 return TARGET_64BIT
45869 ? default_external_stack_protect_fail ()
45870 : default_hidden_stack_protect_fail ();
45873 /* Select a format to encode pointers in exception handling data. CODE
45874 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
45875 true if the symbol may be affected by dynamic relocations.
45877 ??? All x86 object file formats are capable of representing this.
45878 After all, the relocation needed is the same as for the call insn.
45879 Whether or not a particular assembler allows us to enter such, I
45880 guess we'll have to see. */
45882 asm_preferred_eh_data_format (int code, int global)
45886 int type = DW_EH_PE_sdata8;
45888 || ix86_cmodel == CM_SMALL_PIC
45889 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
45890 type = DW_EH_PE_sdata4;
45891 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
45893 if (ix86_cmodel == CM_SMALL
45894 || (ix86_cmodel == CM_MEDIUM && code))
45895 return DW_EH_PE_udata4;
45896 return DW_EH_PE_absptr;
45899 /* Expand copysign from SIGN to the positive value ABS_VALUE
45900 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
45903 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
45905 machine_mode mode = GET_MODE (sign);
45906 rtx sgn = gen_reg_rtx (mode);
45907 if (mask == NULL_RTX)
45909 machine_mode vmode;
45911 if (mode == SFmode)
45913 else if (mode == DFmode)
45918 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
45919 if (!VECTOR_MODE_P (mode))
45921 /* We need to generate a scalar mode mask in this case. */
45922 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45923 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45924 mask = gen_reg_rtx (mode);
45925 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
45929 mask = gen_rtx_NOT (mode, mask);
45930 emit_insn (gen_rtx_SET (VOIDmode, sgn,
45931 gen_rtx_AND (mode, mask, sign)));
45932 emit_insn (gen_rtx_SET (VOIDmode, result,
45933 gen_rtx_IOR (mode, abs_value, sgn)));
45936 /* Expand fabs (OP0) and return a new rtx that holds the result. The
45937 mask for masking out the sign-bit is stored in *SMASK, if that is
45940 ix86_expand_sse_fabs (rtx op0, rtx *smask)
45942 machine_mode vmode, mode = GET_MODE (op0);
45945 xa = gen_reg_rtx (mode);
45946 if (mode == SFmode)
45948 else if (mode == DFmode)
45952 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
45953 if (!VECTOR_MODE_P (mode))
45955 /* We need to generate a scalar mode mask in this case. */
45956 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45957 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45958 mask = gen_reg_rtx (mode);
45959 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
45961 emit_insn (gen_rtx_SET (VOIDmode, xa,
45962 gen_rtx_AND (mode, op0, mask)));
45970 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
45971 swapping the operands if SWAP_OPERANDS is true. The expanded
45972 code is a forward jump to a newly created label in case the
45973 comparison is true. The generated label rtx is returned. */
45974 static rtx_code_label *
45975 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
45976 bool swap_operands)
45978 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
45979 rtx_code_label *label;
45983 std::swap (op0, op1);
45985 label = gen_label_rtx ();
45986 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
45987 emit_insn (gen_rtx_SET (VOIDmode, tmp,
45988 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
45989 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
45990 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
45991 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
45992 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
45993 JUMP_LABEL (tmp) = label;
45998 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
45999 using comparison code CODE. Operands are swapped for the comparison if
46000 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
46002 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
46003 bool swap_operands)
46005 rtx (*insn)(rtx, rtx, rtx, rtx);
46006 machine_mode mode = GET_MODE (op0);
46007 rtx mask = gen_reg_rtx (mode);
46010 std::swap (op0, op1);
46012 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
46014 emit_insn (insn (mask, op0, op1,
46015 gen_rtx_fmt_ee (code, mode, op0, op1)));
46019 /* Generate and return a rtx of mode MODE for 2**n where n is the number
46020 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
46022 ix86_gen_TWO52 (machine_mode mode)
46024 REAL_VALUE_TYPE TWO52r;
46027 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
46028 TWO52 = const_double_from_real_value (TWO52r, mode);
46029 TWO52 = force_reg (mode, TWO52);
46034 /* Expand SSE sequence for computing lround from OP1 storing
46037 ix86_expand_lround (rtx op0, rtx op1)
46039 /* C code for the stuff we're doing below:
46040 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
46043 machine_mode mode = GET_MODE (op1);
46044 const struct real_format *fmt;
46045 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46048 /* load nextafter (0.5, 0.0) */
46049 fmt = REAL_MODE_FORMAT (mode);
46050 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46051 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46053 /* adj = copysign (0.5, op1) */
46054 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
46055 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
46057 /* adj = op1 + adj */
46058 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
46060 /* op0 = (imode)adj */
46061 expand_fix (op0, adj, 0);
46064 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
46067 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
46069 /* C code for the stuff we're doing below (for do_floor):
46071 xi -= (double)xi > op1 ? 1 : 0;
46074 machine_mode fmode = GET_MODE (op1);
46075 machine_mode imode = GET_MODE (op0);
46076 rtx ireg, freg, tmp;
46077 rtx_code_label *label;
46079 /* reg = (long)op1 */
46080 ireg = gen_reg_rtx (imode);
46081 expand_fix (ireg, op1, 0);
46083 /* freg = (double)reg */
46084 freg = gen_reg_rtx (fmode);
46085 expand_float (freg, ireg, 0);
46087 /* ireg = (freg > op1) ? ireg - 1 : ireg */
46088 label = ix86_expand_sse_compare_and_jump (UNLE,
46089 freg, op1, !do_floor);
46090 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46091 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46092 emit_move_insn (ireg, tmp);
46094 emit_label (label);
46095 LABEL_NUSES (label) = 1;
46097 emit_move_insn (op0, ireg);
46100 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46101 result in OPERAND0. */
46103 ix86_expand_rint (rtx operand0, rtx operand1)
46105 /* C code for the stuff we're doing below:
46106 xa = fabs (operand1);
46107 if (!isless (xa, 2**52))
46109 xa = xa + 2**52 - 2**52;
46110 return copysign (xa, operand1);
46112 machine_mode mode = GET_MODE (operand0);
46113 rtx res, xa, TWO52, mask;
46114 rtx_code_label *label;
46116 res = gen_reg_rtx (mode);
46117 emit_move_insn (res, operand1);
46119 /* xa = abs (operand1) */
46120 xa = ix86_expand_sse_fabs (res, &mask);
46122 /* if (!isless (xa, TWO52)) goto label; */
46123 TWO52 = ix86_gen_TWO52 (mode);
46124 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46126 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46127 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46129 ix86_sse_copysign_to_positive (res, xa, res, mask);
46131 emit_label (label);
46132 LABEL_NUSES (label) = 1;
46134 emit_move_insn (operand0, res);
46137 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46140 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46142 /* C code for the stuff we expand below.
46143 double xa = fabs (x), x2;
46144 if (!isless (xa, TWO52))
46146 xa = xa + TWO52 - TWO52;
46147 x2 = copysign (xa, x);
46156 machine_mode mode = GET_MODE (operand0);
46157 rtx xa, TWO52, tmp, one, res, mask;
46158 rtx_code_label *label;
46160 TWO52 = ix86_gen_TWO52 (mode);
46162 /* Temporary for holding the result, initialized to the input
46163 operand to ease control flow. */
46164 res = gen_reg_rtx (mode);
46165 emit_move_insn (res, operand1);
46167 /* xa = abs (operand1) */
46168 xa = ix86_expand_sse_fabs (res, &mask);
46170 /* if (!isless (xa, TWO52)) goto label; */
46171 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46173 /* xa = xa + TWO52 - TWO52; */
46174 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46175 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46177 /* xa = copysign (xa, operand1) */
46178 ix86_sse_copysign_to_positive (xa, xa, res, mask);
46180 /* generate 1.0 or -1.0 */
46181 one = force_reg (mode,
46182 const_double_from_real_value (do_floor
46183 ? dconst1 : dconstm1, mode));
46185 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46186 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46187 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46188 gen_rtx_AND (mode, one, tmp)));
46189 /* We always need to subtract here to preserve signed zero. */
46190 tmp = expand_simple_binop (mode, MINUS,
46191 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46192 emit_move_insn (res, tmp);
46194 emit_label (label);
46195 LABEL_NUSES (label) = 1;
46197 emit_move_insn (operand0, res);
46200 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46203 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46205 /* C code for the stuff we expand below.
46206 double xa = fabs (x), x2;
46207 if (!isless (xa, TWO52))
46209 x2 = (double)(long)x;
46216 if (HONOR_SIGNED_ZEROS (mode))
46217 return copysign (x2, x);
46220 machine_mode mode = GET_MODE (operand0);
46221 rtx xa, xi, TWO52, tmp, one, res, mask;
46222 rtx_code_label *label;
46224 TWO52 = ix86_gen_TWO52 (mode);
46226 /* Temporary for holding the result, initialized to the input
46227 operand to ease control flow. */
46228 res = gen_reg_rtx (mode);
46229 emit_move_insn (res, operand1);
46231 /* xa = abs (operand1) */
46232 xa = ix86_expand_sse_fabs (res, &mask);
46234 /* if (!isless (xa, TWO52)) goto label; */
46235 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46237 /* xa = (double)(long)x */
46238 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46239 expand_fix (xi, res, 0);
46240 expand_float (xa, xi, 0);
46243 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46245 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46246 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46247 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46248 gen_rtx_AND (mode, one, tmp)));
46249 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46250 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46251 emit_move_insn (res, tmp);
46253 if (HONOR_SIGNED_ZEROS (mode))
46254 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46256 emit_label (label);
46257 LABEL_NUSES (label) = 1;
46259 emit_move_insn (operand0, res);
46262 /* Expand SSE sequence for computing round from OPERAND1 storing
46263 into OPERAND0. Sequence that works without relying on DImode truncation
46264 via cvttsd2siq that is only available on 64bit targets. */
46266 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46268 /* C code for the stuff we expand below.
46269 double xa = fabs (x), xa2, x2;
46270 if (!isless (xa, TWO52))
46272 Using the absolute value and copying back sign makes
46273 -0.0 -> -0.0 correct.
46274 xa2 = xa + TWO52 - TWO52;
46279 else if (dxa > 0.5)
46281 x2 = copysign (xa2, x);
46284 machine_mode mode = GET_MODE (operand0);
46285 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46286 rtx_code_label *label;
46288 TWO52 = ix86_gen_TWO52 (mode);
46290 /* Temporary for holding the result, initialized to the input
46291 operand to ease control flow. */
46292 res = gen_reg_rtx (mode);
46293 emit_move_insn (res, operand1);
46295 /* xa = abs (operand1) */
46296 xa = ix86_expand_sse_fabs (res, &mask);
46298 /* if (!isless (xa, TWO52)) goto label; */
46299 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46301 /* xa2 = xa + TWO52 - TWO52; */
46302 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46303 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46305 /* dxa = xa2 - xa; */
46306 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46308 /* generate 0.5, 1.0 and -0.5 */
46309 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46310 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46311 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46315 tmp = gen_reg_rtx (mode);
46316 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46317 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46318 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46319 gen_rtx_AND (mode, one, tmp)));
46320 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46321 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46322 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46323 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46324 gen_rtx_AND (mode, one, tmp)));
46325 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46327 /* res = copysign (xa2, operand1) */
46328 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46330 emit_label (label);
46331 LABEL_NUSES (label) = 1;
46333 emit_move_insn (operand0, res);
46336 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46339 ix86_expand_trunc (rtx operand0, rtx operand1)
46341 /* C code for SSE variant we expand below.
46342 double xa = fabs (x), x2;
46343 if (!isless (xa, TWO52))
46345 x2 = (double)(long)x;
46346 if (HONOR_SIGNED_ZEROS (mode))
46347 return copysign (x2, x);
46350 machine_mode mode = GET_MODE (operand0);
46351 rtx xa, xi, TWO52, res, mask;
46352 rtx_code_label *label;
46354 TWO52 = ix86_gen_TWO52 (mode);
46356 /* Temporary for holding the result, initialized to the input
46357 operand to ease control flow. */
46358 res = gen_reg_rtx (mode);
46359 emit_move_insn (res, operand1);
46361 /* xa = abs (operand1) */
46362 xa = ix86_expand_sse_fabs (res, &mask);
46364 /* if (!isless (xa, TWO52)) goto label; */
46365 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46367 /* x = (double)(long)x */
46368 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46369 expand_fix (xi, res, 0);
46370 expand_float (res, xi, 0);
46372 if (HONOR_SIGNED_ZEROS (mode))
46373 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46375 emit_label (label);
46376 LABEL_NUSES (label) = 1;
46378 emit_move_insn (operand0, res);
46381 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46384 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46386 machine_mode mode = GET_MODE (operand0);
46387 rtx xa, mask, TWO52, one, res, smask, tmp;
46388 rtx_code_label *label;
46390 /* C code for SSE variant we expand below.
46391 double xa = fabs (x), x2;
46392 if (!isless (xa, TWO52))
46394 xa2 = xa + TWO52 - TWO52;
46398 x2 = copysign (xa2, x);
46402 TWO52 = ix86_gen_TWO52 (mode);
46404 /* Temporary for holding the result, initialized to the input
46405 operand to ease control flow. */
46406 res = gen_reg_rtx (mode);
46407 emit_move_insn (res, operand1);
46409 /* xa = abs (operand1) */
46410 xa = ix86_expand_sse_fabs (res, &smask);
46412 /* if (!isless (xa, TWO52)) goto label; */
46413 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46415 /* res = xa + TWO52 - TWO52; */
46416 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46417 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46418 emit_move_insn (res, tmp);
46421 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46423 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46424 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46425 emit_insn (gen_rtx_SET (VOIDmode, mask,
46426 gen_rtx_AND (mode, mask, one)));
46427 tmp = expand_simple_binop (mode, MINUS,
46428 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46429 emit_move_insn (res, tmp);
46431 /* res = copysign (res, operand1) */
46432 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46434 emit_label (label);
46435 LABEL_NUSES (label) = 1;
46437 emit_move_insn (operand0, res);
46440 /* Expand SSE sequence for computing round from OPERAND1 storing
46443 ix86_expand_round (rtx operand0, rtx operand1)
46445 /* C code for the stuff we're doing below:
46446 double xa = fabs (x);
46447 if (!isless (xa, TWO52))
46449 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46450 return copysign (xa, x);
46452 machine_mode mode = GET_MODE (operand0);
46453 rtx res, TWO52, xa, xi, half, mask;
46454 rtx_code_label *label;
46455 const struct real_format *fmt;
46456 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46458 /* Temporary for holding the result, initialized to the input
46459 operand to ease control flow. */
46460 res = gen_reg_rtx (mode);
46461 emit_move_insn (res, operand1);
46463 TWO52 = ix86_gen_TWO52 (mode);
46464 xa = ix86_expand_sse_fabs (res, &mask);
46465 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46467 /* load nextafter (0.5, 0.0) */
46468 fmt = REAL_MODE_FORMAT (mode);
46469 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46470 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46472 /* xa = xa + 0.5 */
46473 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46474 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46476 /* xa = (double)(int64_t)xa */
46477 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46478 expand_fix (xi, xa, 0);
46479 expand_float (xa, xi, 0);
46481 /* res = copysign (xa, operand1) */
46482 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46484 emit_label (label);
46485 LABEL_NUSES (label) = 1;
46487 emit_move_insn (operand0, res);
46490 /* Expand SSE sequence for computing round
46491 from OP1 storing into OP0 using sse4 round insn. */
46493 ix86_expand_round_sse4 (rtx op0, rtx op1)
46495 machine_mode mode = GET_MODE (op0);
46496 rtx e1, e2, res, half;
46497 const struct real_format *fmt;
46498 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46499 rtx (*gen_copysign) (rtx, rtx, rtx);
46500 rtx (*gen_round) (rtx, rtx, rtx);
46505 gen_copysign = gen_copysignsf3;
46506 gen_round = gen_sse4_1_roundsf2;
46509 gen_copysign = gen_copysigndf3;
46510 gen_round = gen_sse4_1_rounddf2;
46513 gcc_unreachable ();
46516 /* round (a) = trunc (a + copysign (0.5, a)) */
46518 /* load nextafter (0.5, 0.0) */
46519 fmt = REAL_MODE_FORMAT (mode);
46520 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46521 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46522 half = const_double_from_real_value (pred_half, mode);
46524 /* e1 = copysign (0.5, op1) */
46525 e1 = gen_reg_rtx (mode);
46526 emit_insn (gen_copysign (e1, half, op1));
46528 /* e2 = op1 + e1 */
46529 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46531 /* res = trunc (e2) */
46532 res = gen_reg_rtx (mode);
46533 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46535 emit_move_insn (op0, res);
46539 /* Table of valid machine attributes. */
46540 static const struct attribute_spec ix86_attribute_table[] =
46542 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46543 affects_type_identity } */
46544 /* Stdcall attribute says callee is responsible for popping arguments
46545 if they are not variable. */
46546 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46548 /* Fastcall attribute says callee is responsible for popping arguments
46549 if they are not variable. */
46550 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46552 /* Thiscall attribute says callee is responsible for popping arguments
46553 if they are not variable. */
46554 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46556 /* Cdecl attribute says the callee is a normal C declaration */
46557 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46559 /* Regparm attribute specifies how many integer arguments are to be
46560 passed in registers. */
46561 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
46563 /* Sseregparm attribute says we are using x86_64 calling conventions
46564 for FP arguments. */
46565 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46567 /* The transactional memory builtins are implicitly regparm or fastcall
46568 depending on the ABI. Override the generic do-nothing attribute that
46569 these builtins were declared with. */
46570 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
46572 /* force_align_arg_pointer says this function realigns the stack at entry. */
46573 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46574 false, true, true, ix86_handle_cconv_attribute, false },
46575 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46576 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
46577 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
46578 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
46581 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46583 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46585 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46586 SUBTARGET_ATTRIBUTE_TABLE,
46588 /* ms_abi and sysv_abi calling convention function attributes. */
46589 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46590 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46591 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
46593 { "callee_pop_aggregate_return", 1, 1, false, true, true,
46594 ix86_handle_callee_pop_aggregate_return, true },
46596 { NULL, 0, 0, false, false, false, NULL, false }
46599 /* Implement targetm.vectorize.builtin_vectorization_cost. */
46601 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46606 switch (type_of_cost)
46609 return ix86_cost->scalar_stmt_cost;
46612 return ix86_cost->scalar_load_cost;
46615 return ix86_cost->scalar_store_cost;
46618 return ix86_cost->vec_stmt_cost;
46621 return ix86_cost->vec_align_load_cost;
46624 return ix86_cost->vec_store_cost;
46626 case vec_to_scalar:
46627 return ix86_cost->vec_to_scalar_cost;
46629 case scalar_to_vec:
46630 return ix86_cost->scalar_to_vec_cost;
46632 case unaligned_load:
46633 case unaligned_store:
46634 return ix86_cost->vec_unalign_load_cost;
46636 case cond_branch_taken:
46637 return ix86_cost->cond_taken_branch_cost;
46639 case cond_branch_not_taken:
46640 return ix86_cost->cond_not_taken_branch_cost;
46643 case vec_promote_demote:
46644 return ix86_cost->vec_stmt_cost;
46646 case vec_construct:
46647 elements = TYPE_VECTOR_SUBPARTS (vectype);
46648 return elements / 2 + 1;
46651 gcc_unreachable ();
46655 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
46656 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
46657 insn every time. */
46659 static GTY(()) rtx_insn *vselect_insn;
46661 /* Initialize vselect_insn. */
46664 init_vselect_insn (void)
46669 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
46670 for (i = 0; i < MAX_VECT_LEN; ++i)
46671 XVECEXP (x, 0, i) = const0_rtx;
46672 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
46674 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
46676 vselect_insn = emit_insn (x);
46680 /* Construct (set target (vec_select op0 (parallel perm))) and
46681 return true if that's a valid instruction in the active ISA. */
46684 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
46685 unsigned nelt, bool testing_p)
46688 rtx x, save_vconcat;
46691 if (vselect_insn == NULL_RTX)
46692 init_vselect_insn ();
46694 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
46695 PUT_NUM_ELEM (XVEC (x, 0), nelt);
46696 for (i = 0; i < nelt; ++i)
46697 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
46698 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46699 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
46700 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
46701 SET_DEST (PATTERN (vselect_insn)) = target;
46702 icode = recog_memoized (vselect_insn);
46704 if (icode >= 0 && !testing_p)
46705 emit_insn (copy_rtx (PATTERN (vselect_insn)));
46707 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
46708 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
46709 INSN_CODE (vselect_insn) = -1;
46714 /* Similar, but generate a vec_concat from op0 and op1 as well. */
46717 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
46718 const unsigned char *perm, unsigned nelt,
46721 machine_mode v2mode;
46725 if (vselect_insn == NULL_RTX)
46726 init_vselect_insn ();
46728 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
46729 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46730 PUT_MODE (x, v2mode);
46733 ok = expand_vselect (target, x, perm, nelt, testing_p);
46734 XEXP (x, 0) = const0_rtx;
46735 XEXP (x, 1) = const0_rtx;
46739 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46740 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
46743 expand_vec_perm_blend (struct expand_vec_perm_d *d)
46745 machine_mode vmode = d->vmode;
46746 unsigned i, mask, nelt = d->nelt;
46747 rtx target, op0, op1, x;
46748 rtx rperm[32], vperm;
46750 if (d->one_operand_p)
46752 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
46753 && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
46755 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
46757 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
46759 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46764 /* This is a blend, not a permute. Elements must stay in their
46765 respective lanes. */
46766 for (i = 0; i < nelt; ++i)
46768 unsigned e = d->perm[i];
46769 if (!(e == i || e == i + nelt))
46776 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
46777 decision should be extracted elsewhere, so that we only try that
46778 sequence once all budget==3 options have been tried. */
46779 target = d->target;
46798 for (i = 0; i < nelt; ++i)
46799 mask |= (d->perm[i] >= nelt) << i;
46803 for (i = 0; i < 2; ++i)
46804 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
46809 for (i = 0; i < 4; ++i)
46810 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46815 /* See if bytes move in pairs so we can use pblendw with
46816 an immediate argument, rather than pblendvb with a vector
46818 for (i = 0; i < 16; i += 2)
46819 if (d->perm[i] + 1 != d->perm[i + 1])
46822 for (i = 0; i < nelt; ++i)
46823 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
46826 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
46827 vperm = force_reg (vmode, vperm);
46829 if (GET_MODE_SIZE (vmode) == 16)
46830 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
46832 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
46833 if (target != d->target)
46834 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46838 for (i = 0; i < 8; ++i)
46839 mask |= (d->perm[i * 2] >= 16) << i;
46844 target = gen_reg_rtx (vmode);
46845 op0 = gen_lowpart (vmode, op0);
46846 op1 = gen_lowpart (vmode, op1);
46850 /* See if bytes move in pairs. If not, vpblendvb must be used. */
46851 for (i = 0; i < 32; i += 2)
46852 if (d->perm[i] + 1 != d->perm[i + 1])
46854 /* See if bytes move in quadruplets. If yes, vpblendd
46855 with immediate can be used. */
46856 for (i = 0; i < 32; i += 4)
46857 if (d->perm[i] + 2 != d->perm[i + 2])
46861 /* See if bytes move the same in both lanes. If yes,
46862 vpblendw with immediate can be used. */
46863 for (i = 0; i < 16; i += 2)
46864 if (d->perm[i] + 16 != d->perm[i + 16])
46867 /* Use vpblendw. */
46868 for (i = 0; i < 16; ++i)
46869 mask |= (d->perm[i * 2] >= 32) << i;
46874 /* Use vpblendd. */
46875 for (i = 0; i < 8; ++i)
46876 mask |= (d->perm[i * 4] >= 32) << i;
46881 /* See if words move in pairs. If yes, vpblendd can be used. */
46882 for (i = 0; i < 16; i += 2)
46883 if (d->perm[i] + 1 != d->perm[i + 1])
46887 /* See if words move the same in both lanes. If not,
46888 vpblendvb must be used. */
46889 for (i = 0; i < 8; i++)
46890 if (d->perm[i] + 8 != d->perm[i + 8])
46892 /* Use vpblendvb. */
46893 for (i = 0; i < 32; ++i)
46894 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
46898 target = gen_reg_rtx (vmode);
46899 op0 = gen_lowpart (vmode, op0);
46900 op1 = gen_lowpart (vmode, op1);
46901 goto finish_pblendvb;
46904 /* Use vpblendw. */
46905 for (i = 0; i < 16; ++i)
46906 mask |= (d->perm[i] >= 16) << i;
46910 /* Use vpblendd. */
46911 for (i = 0; i < 8; ++i)
46912 mask |= (d->perm[i * 2] >= 16) << i;
46917 /* Use vpblendd. */
46918 for (i = 0; i < 4; ++i)
46919 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46924 gcc_unreachable ();
46927 /* This matches five different patterns with the different modes. */
46928 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
46929 x = gen_rtx_SET (VOIDmode, target, x);
46931 if (target != d->target)
46932 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46937 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46938 in terms of the variable form of vpermilps.
46940 Note that we will have already failed the immediate input vpermilps,
46941 which requires that the high and low part shuffle be identical; the
46942 variable form doesn't require that. */
46945 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
46947 rtx rperm[8], vperm;
46950 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
46953 /* We can only permute within the 128-bit lane. */
46954 for (i = 0; i < 8; ++i)
46956 unsigned e = d->perm[i];
46957 if (i < 4 ? e >= 4 : e < 4)
46964 for (i = 0; i < 8; ++i)
46966 unsigned e = d->perm[i];
46968 /* Within each 128-bit lane, the elements of op0 are numbered
46969 from 0 and the elements of op1 are numbered from 4. */
46975 rperm[i] = GEN_INT (e);
46978 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
46979 vperm = force_reg (V8SImode, vperm);
46980 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
46985 /* Return true if permutation D can be performed as VMODE permutation
46989 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
46991 unsigned int i, j, chunk;
46993 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
46994 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
46995 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
46998 if (GET_MODE_NUNITS (vmode) >= d->nelt)
47001 chunk = d->nelt / GET_MODE_NUNITS (vmode);
47002 for (i = 0; i < d->nelt; i += chunk)
47003 if (d->perm[i] & (chunk - 1))
47006 for (j = 1; j < chunk; ++j)
47007 if (d->perm[i] + j != d->perm[i + j])
47013 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47014 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
47017 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
47019 unsigned i, nelt, eltsz, mask;
47020 unsigned char perm[64];
47021 machine_mode vmode = V16QImode;
47022 rtx rperm[64], vperm, target, op0, op1;
47026 if (!d->one_operand_p)
47028 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
47031 && valid_perm_using_mode_p (V2TImode, d))
47036 /* Use vperm2i128 insn. The pattern uses
47037 V4DImode instead of V2TImode. */
47038 target = d->target;
47039 if (d->vmode != V4DImode)
47040 target = gen_reg_rtx (V4DImode);
47041 op0 = gen_lowpart (V4DImode, d->op0);
47042 op1 = gen_lowpart (V4DImode, d->op1);
47044 = GEN_INT ((d->perm[0] / (nelt / 2))
47045 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
47046 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
47047 if (target != d->target)
47048 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47056 if (GET_MODE_SIZE (d->vmode) == 16)
47061 else if (GET_MODE_SIZE (d->vmode) == 32)
47066 /* V4DImode should be already handled through
47067 expand_vselect by vpermq instruction. */
47068 gcc_assert (d->vmode != V4DImode);
47071 if (d->vmode == V8SImode
47072 || d->vmode == V16HImode
47073 || d->vmode == V32QImode)
47075 /* First see if vpermq can be used for
47076 V8SImode/V16HImode/V32QImode. */
47077 if (valid_perm_using_mode_p (V4DImode, d))
47079 for (i = 0; i < 4; i++)
47080 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47083 target = gen_reg_rtx (V4DImode);
47084 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47087 emit_move_insn (d->target,
47088 gen_lowpart (d->vmode, target));
47094 /* Next see if vpermd can be used. */
47095 if (valid_perm_using_mode_p (V8SImode, d))
47098 /* Or if vpermps can be used. */
47099 else if (d->vmode == V8SFmode)
47102 if (vmode == V32QImode)
47104 /* vpshufb only works intra lanes, it is not
47105 possible to shuffle bytes in between the lanes. */
47106 for (i = 0; i < nelt; ++i)
47107 if ((d->perm[i] ^ i) & (nelt / 2))
47111 else if (GET_MODE_SIZE (d->vmode) == 64)
47113 if (!TARGET_AVX512BW)
47116 /* If vpermq didn't work, vpshufb won't work either. */
47117 if (d->vmode == V8DFmode || d->vmode == V8DImode)
47121 if (d->vmode == V16SImode
47122 || d->vmode == V32HImode
47123 || d->vmode == V64QImode)
47125 /* First see if vpermq can be used for
47126 V16SImode/V32HImode/V64QImode. */
47127 if (valid_perm_using_mode_p (V8DImode, d))
47129 for (i = 0; i < 8; i++)
47130 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47133 target = gen_reg_rtx (V8DImode);
47134 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47137 emit_move_insn (d->target,
47138 gen_lowpart (d->vmode, target));
47144 /* Next see if vpermd can be used. */
47145 if (valid_perm_using_mode_p (V16SImode, d))
47148 /* Or if vpermps can be used. */
47149 else if (d->vmode == V16SFmode)
47151 if (vmode == V64QImode)
47153 /* vpshufb only works intra lanes, it is not
47154 possible to shuffle bytes in between the lanes. */
47155 for (i = 0; i < nelt; ++i)
47156 if ((d->perm[i] ^ i) & (nelt / 4))
47167 if (vmode == V8SImode)
47168 for (i = 0; i < 8; ++i)
47169 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47170 else if (vmode == V16SImode)
47171 for (i = 0; i < 16; ++i)
47172 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47175 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47176 if (!d->one_operand_p)
47177 mask = 2 * nelt - 1;
47178 else if (vmode == V16QImode)
47180 else if (vmode == V64QImode)
47181 mask = nelt / 4 - 1;
47183 mask = nelt / 2 - 1;
47185 for (i = 0; i < nelt; ++i)
47187 unsigned j, e = d->perm[i] & mask;
47188 for (j = 0; j < eltsz; ++j)
47189 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47193 vperm = gen_rtx_CONST_VECTOR (vmode,
47194 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47195 vperm = force_reg (vmode, vperm);
47197 target = d->target;
47198 if (d->vmode != vmode)
47199 target = gen_reg_rtx (vmode);
47200 op0 = gen_lowpart (vmode, d->op0);
47201 if (d->one_operand_p)
47203 if (vmode == V16QImode)
47204 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47205 else if (vmode == V32QImode)
47206 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47207 else if (vmode == V64QImode)
47208 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47209 else if (vmode == V8SFmode)
47210 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47211 else if (vmode == V8SImode)
47212 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47213 else if (vmode == V16SFmode)
47214 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47215 else if (vmode == V16SImode)
47216 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47218 gcc_unreachable ();
47222 op1 = gen_lowpart (vmode, d->op1);
47223 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47225 if (target != d->target)
47226 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47231 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47232 in a single instruction. */
47235 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47237 unsigned i, nelt = d->nelt;
47238 unsigned char perm2[MAX_VECT_LEN];
47240 /* Check plain VEC_SELECT first, because AVX has instructions that could
47241 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47242 input where SEL+CONCAT may not. */
47243 if (d->one_operand_p)
47245 int mask = nelt - 1;
47246 bool identity_perm = true;
47247 bool broadcast_perm = true;
47249 for (i = 0; i < nelt; i++)
47251 perm2[i] = d->perm[i] & mask;
47253 identity_perm = false;
47255 broadcast_perm = false;
47261 emit_move_insn (d->target, d->op0);
47264 else if (broadcast_perm && TARGET_AVX2)
47266 /* Use vpbroadcast{b,w,d}. */
47267 rtx (*gen) (rtx, rtx) = NULL;
47271 if (TARGET_AVX512BW)
47272 gen = gen_avx512bw_vec_dupv64qi_1;
47275 gen = gen_avx2_pbroadcastv32qi_1;
47278 if (TARGET_AVX512BW)
47279 gen = gen_avx512bw_vec_dupv32hi_1;
47282 gen = gen_avx2_pbroadcastv16hi_1;
47285 if (TARGET_AVX512F)
47286 gen = gen_avx512f_vec_dupv16si_1;
47289 gen = gen_avx2_pbroadcastv8si_1;
47292 gen = gen_avx2_pbroadcastv16qi;
47295 gen = gen_avx2_pbroadcastv8hi;
47298 if (TARGET_AVX512F)
47299 gen = gen_avx512f_vec_dupv16sf_1;
47302 gen = gen_avx2_vec_dupv8sf_1;
47305 if (TARGET_AVX512F)
47306 gen = gen_avx512f_vec_dupv8df_1;
47309 if (TARGET_AVX512F)
47310 gen = gen_avx512f_vec_dupv8di_1;
47312 /* For other modes prefer other shuffles this function creates. */
47318 emit_insn (gen (d->target, d->op0));
47323 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47326 /* There are plenty of patterns in sse.md that are written for
47327 SEL+CONCAT and are not replicated for a single op. Perhaps
47328 that should be changed, to avoid the nastiness here. */
47330 /* Recognize interleave style patterns, which means incrementing
47331 every other permutation operand. */
47332 for (i = 0; i < nelt; i += 2)
47334 perm2[i] = d->perm[i] & mask;
47335 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47337 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47341 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47344 for (i = 0; i < nelt; i += 4)
47346 perm2[i + 0] = d->perm[i + 0] & mask;
47347 perm2[i + 1] = d->perm[i + 1] & mask;
47348 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47349 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47352 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47358 /* Finally, try the fully general two operand permute. */
47359 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47363 /* Recognize interleave style patterns with reversed operands. */
47364 if (!d->one_operand_p)
47366 for (i = 0; i < nelt; ++i)
47368 unsigned e = d->perm[i];
47376 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47381 /* Try the SSE4.1 blend variable merge instructions. */
47382 if (expand_vec_perm_blend (d))
47385 /* Try one of the AVX vpermil variable permutations. */
47386 if (expand_vec_perm_vpermil (d))
47389 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47390 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47391 if (expand_vec_perm_pshufb (d))
47394 /* Try the AVX2 vpalignr instruction. */
47395 if (expand_vec_perm_palignr (d, true))
47398 /* Try the AVX512F vpermi2 instructions. */
47399 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47405 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47406 in terms of a pair of pshuflw + pshufhw instructions. */
47409 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47411 unsigned char perm2[MAX_VECT_LEN];
47415 if (d->vmode != V8HImode || !d->one_operand_p)
47418 /* The two permutations only operate in 64-bit lanes. */
47419 for (i = 0; i < 4; ++i)
47420 if (d->perm[i] >= 4)
47422 for (i = 4; i < 8; ++i)
47423 if (d->perm[i] < 4)
47429 /* Emit the pshuflw. */
47430 memcpy (perm2, d->perm, 4);
47431 for (i = 4; i < 8; ++i)
47433 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47436 /* Emit the pshufhw. */
47437 memcpy (perm2 + 4, d->perm + 4, 4);
47438 for (i = 0; i < 4; ++i)
47440 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47446 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47447 the permutation using the SSSE3 palignr instruction. This succeeds
47448 when all of the elements in PERM fit within one vector and we merely
47449 need to shift them down so that a single vector permutation has a
47450 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47451 the vpalignr instruction itself can perform the requested permutation. */
47454 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47456 unsigned i, nelt = d->nelt;
47457 unsigned min, max, minswap, maxswap;
47458 bool in_order, ok, swap = false;
47460 struct expand_vec_perm_d dcopy;
47462 /* Even with AVX, palignr only operates on 128-bit vectors,
47463 in AVX2 palignr operates on both 128-bit lanes. */
47464 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47465 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47470 minswap = 2 * nelt;
47472 for (i = 0; i < nelt; ++i)
47474 unsigned e = d->perm[i];
47475 unsigned eswap = d->perm[i] ^ nelt;
47476 if (GET_MODE_SIZE (d->vmode) == 32)
47478 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47479 eswap = e ^ (nelt / 2);
47485 if (eswap < minswap)
47487 if (eswap > maxswap)
47491 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47493 if (d->one_operand_p
47495 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47496 ? nelt / 2 : nelt))
47503 /* Given that we have SSSE3, we know we'll be able to implement the
47504 single operand permutation after the palignr with pshufb for
47505 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47507 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47513 dcopy.op0 = d->op1;
47514 dcopy.op1 = d->op0;
47515 for (i = 0; i < nelt; ++i)
47516 dcopy.perm[i] ^= nelt;
47520 for (i = 0; i < nelt; ++i)
47522 unsigned e = dcopy.perm[i];
47523 if (GET_MODE_SIZE (d->vmode) == 32
47525 && (e & (nelt / 2 - 1)) < min)
47526 e = e - min - (nelt / 2);
47533 dcopy.one_operand_p = true;
47535 if (single_insn_only_p && !in_order)
47538 /* For AVX2, test whether we can permute the result in one instruction. */
47543 dcopy.op1 = dcopy.op0;
47544 return expand_vec_perm_1 (&dcopy);
47547 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
47548 if (GET_MODE_SIZE (d->vmode) == 16)
47550 target = gen_reg_rtx (TImode);
47551 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47552 gen_lowpart (TImode, dcopy.op0), shift));
47556 target = gen_reg_rtx (V2TImode);
47557 emit_insn (gen_avx2_palignrv2ti (target,
47558 gen_lowpart (V2TImode, dcopy.op1),
47559 gen_lowpart (V2TImode, dcopy.op0),
47563 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47565 /* Test for the degenerate case where the alignment by itself
47566 produces the desired permutation. */
47569 emit_move_insn (d->target, dcopy.op0);
47573 ok = expand_vec_perm_1 (&dcopy);
47574 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47579 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
47580 the permutation using the SSE4_1 pblendv instruction. Potentially
47581 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
47584 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47586 unsigned i, which, nelt = d->nelt;
47587 struct expand_vec_perm_d dcopy, dcopy1;
47588 machine_mode vmode = d->vmode;
47591 /* Use the same checks as in expand_vec_perm_blend. */
47592 if (d->one_operand_p)
47594 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47596 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47598 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47603 /* Figure out where permutation elements stay not in their
47604 respective lanes. */
47605 for (i = 0, which = 0; i < nelt; ++i)
47607 unsigned e = d->perm[i];
47609 which |= (e < nelt ? 1 : 2);
47611 /* We can pblend the part where elements stay not in their
47612 respective lanes only when these elements are all in one
47613 half of a permutation.
47614 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
47615 lanes, but both 8 and 9 >= 8
47616 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
47617 respective lanes and 8 >= 8, but 2 not. */
47618 if (which != 1 && which != 2)
47620 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
47623 /* First we apply one operand permutation to the part where
47624 elements stay not in their respective lanes. */
47627 dcopy.op0 = dcopy.op1 = d->op1;
47629 dcopy.op0 = dcopy.op1 = d->op0;
47631 dcopy.target = gen_reg_rtx (vmode);
47632 dcopy.one_operand_p = true;
47634 for (i = 0; i < nelt; ++i)
47635 dcopy.perm[i] = d->perm[i] & (nelt - 1);
47637 ok = expand_vec_perm_1 (&dcopy);
47638 if (GET_MODE_SIZE (vmode) != 16 && !ok)
47645 /* Next we put permuted elements into their positions. */
47648 dcopy1.op1 = dcopy.target;
47650 dcopy1.op0 = dcopy.target;
47652 for (i = 0; i < nelt; ++i)
47653 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
47655 ok = expand_vec_perm_blend (&dcopy1);
47661 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
47663 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47664 a two vector permutation into a single vector permutation by using
47665 an interleave operation to merge the vectors. */
47668 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
47670 struct expand_vec_perm_d dremap, dfinal;
47671 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
47672 unsigned HOST_WIDE_INT contents;
47673 unsigned char remap[2 * MAX_VECT_LEN];
47675 bool ok, same_halves = false;
47677 if (GET_MODE_SIZE (d->vmode) == 16)
47679 if (d->one_operand_p)
47682 else if (GET_MODE_SIZE (d->vmode) == 32)
47686 /* For 32-byte modes allow even d->one_operand_p.
47687 The lack of cross-lane shuffling in some instructions
47688 might prevent a single insn shuffle. */
47690 dfinal.testing_p = true;
47691 /* If expand_vec_perm_interleave3 can expand this into
47692 a 3 insn sequence, give up and let it be expanded as
47693 3 insn sequence. While that is one insn longer,
47694 it doesn't need a memory operand and in the common
47695 case that both interleave low and high permutations
47696 with the same operands are adjacent needs 4 insns
47697 for both after CSE. */
47698 if (expand_vec_perm_interleave3 (&dfinal))
47704 /* Examine from whence the elements come. */
47706 for (i = 0; i < nelt; ++i)
47707 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
47709 memset (remap, 0xff, sizeof (remap));
47712 if (GET_MODE_SIZE (d->vmode) == 16)
47714 unsigned HOST_WIDE_INT h1, h2, h3, h4;
47716 /* Split the two input vectors into 4 halves. */
47717 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
47722 /* If the elements from the low halves use interleave low, and similarly
47723 for interleave high. If the elements are from mis-matched halves, we
47724 can use shufps for V4SF/V4SI or do a DImode shuffle. */
47725 if ((contents & (h1 | h3)) == contents)
47728 for (i = 0; i < nelt2; ++i)
47731 remap[i + nelt] = i * 2 + 1;
47732 dremap.perm[i * 2] = i;
47733 dremap.perm[i * 2 + 1] = i + nelt;
47735 if (!TARGET_SSE2 && d->vmode == V4SImode)
47736 dremap.vmode = V4SFmode;
47738 else if ((contents & (h2 | h4)) == contents)
47741 for (i = 0; i < nelt2; ++i)
47743 remap[i + nelt2] = i * 2;
47744 remap[i + nelt + nelt2] = i * 2 + 1;
47745 dremap.perm[i * 2] = i + nelt2;
47746 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
47748 if (!TARGET_SSE2 && d->vmode == V4SImode)
47749 dremap.vmode = V4SFmode;
47751 else if ((contents & (h1 | h4)) == contents)
47754 for (i = 0; i < nelt2; ++i)
47757 remap[i + nelt + nelt2] = i + nelt2;
47758 dremap.perm[i] = i;
47759 dremap.perm[i + nelt2] = i + nelt + nelt2;
47764 dremap.vmode = V2DImode;
47766 dremap.perm[0] = 0;
47767 dremap.perm[1] = 3;
47770 else if ((contents & (h2 | h3)) == contents)
47773 for (i = 0; i < nelt2; ++i)
47775 remap[i + nelt2] = i;
47776 remap[i + nelt] = i + nelt2;
47777 dremap.perm[i] = i + nelt2;
47778 dremap.perm[i + nelt2] = i + nelt;
47783 dremap.vmode = V2DImode;
47785 dremap.perm[0] = 1;
47786 dremap.perm[1] = 2;
47794 unsigned int nelt4 = nelt / 4, nzcnt = 0;
47795 unsigned HOST_WIDE_INT q[8];
47796 unsigned int nonzero_halves[4];
47798 /* Split the two input vectors into 8 quarters. */
47799 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
47800 for (i = 1; i < 8; ++i)
47801 q[i] = q[0] << (nelt4 * i);
47802 for (i = 0; i < 4; ++i)
47803 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
47805 nonzero_halves[nzcnt] = i;
47811 gcc_assert (d->one_operand_p);
47812 nonzero_halves[1] = nonzero_halves[0];
47813 same_halves = true;
47815 else if (d->one_operand_p)
47817 gcc_assert (nonzero_halves[0] == 0);
47818 gcc_assert (nonzero_halves[1] == 1);
47823 if (d->perm[0] / nelt2 == nonzero_halves[1])
47825 /* Attempt to increase the likelihood that dfinal
47826 shuffle will be intra-lane. */
47827 char tmph = nonzero_halves[0];
47828 nonzero_halves[0] = nonzero_halves[1];
47829 nonzero_halves[1] = tmph;
47832 /* vperm2f128 or vperm2i128. */
47833 for (i = 0; i < nelt2; ++i)
47835 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
47836 remap[i + nonzero_halves[0] * nelt2] = i;
47837 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
47838 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
47841 if (d->vmode != V8SFmode
47842 && d->vmode != V4DFmode
47843 && d->vmode != V8SImode)
47845 dremap.vmode = V8SImode;
47847 for (i = 0; i < 4; ++i)
47849 dremap.perm[i] = i + nonzero_halves[0] * 4;
47850 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
47854 else if (d->one_operand_p)
47856 else if (TARGET_AVX2
47857 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
47860 for (i = 0; i < nelt4; ++i)
47863 remap[i + nelt] = i * 2 + 1;
47864 remap[i + nelt2] = i * 2 + nelt2;
47865 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
47866 dremap.perm[i * 2] = i;
47867 dremap.perm[i * 2 + 1] = i + nelt;
47868 dremap.perm[i * 2 + nelt2] = i + nelt2;
47869 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
47872 else if (TARGET_AVX2
47873 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
47876 for (i = 0; i < nelt4; ++i)
47878 remap[i + nelt4] = i * 2;
47879 remap[i + nelt + nelt4] = i * 2 + 1;
47880 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
47881 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
47882 dremap.perm[i * 2] = i + nelt4;
47883 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
47884 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
47885 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
47892 /* Use the remapping array set up above to move the elements from their
47893 swizzled locations into their final destinations. */
47895 for (i = 0; i < nelt; ++i)
47897 unsigned e = remap[d->perm[i]];
47898 gcc_assert (e < nelt);
47899 /* If same_halves is true, both halves of the remapped vector are the
47900 same. Avoid cross-lane accesses if possible. */
47901 if (same_halves && i >= nelt2)
47903 gcc_assert (e < nelt2);
47904 dfinal.perm[i] = e + nelt2;
47907 dfinal.perm[i] = e;
47911 dremap.target = gen_reg_rtx (dremap.vmode);
47912 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47914 dfinal.op1 = dfinal.op0;
47915 dfinal.one_operand_p = true;
47917 /* Test if the final remap can be done with a single insn. For V4SFmode or
47918 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
47920 ok = expand_vec_perm_1 (&dfinal);
47921 seq = get_insns ();
47930 if (dremap.vmode != dfinal.vmode)
47932 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
47933 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
47936 ok = expand_vec_perm_1 (&dremap);
47943 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47944 a single vector cross-lane permutation into vpermq followed
47945 by any of the single insn permutations. */
47948 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
47950 struct expand_vec_perm_d dremap, dfinal;
47951 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
47952 unsigned contents[2];
47956 && (d->vmode == V32QImode || d->vmode == V16HImode)
47957 && d->one_operand_p))
47962 for (i = 0; i < nelt2; ++i)
47964 contents[0] |= 1u << (d->perm[i] / nelt4);
47965 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
47968 for (i = 0; i < 2; ++i)
47970 unsigned int cnt = 0;
47971 for (j = 0; j < 4; ++j)
47972 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
47980 dremap.vmode = V4DImode;
47982 dremap.target = gen_reg_rtx (V4DImode);
47983 dremap.op0 = gen_lowpart (V4DImode, d->op0);
47984 dremap.op1 = dremap.op0;
47985 dremap.one_operand_p = true;
47986 for (i = 0; i < 2; ++i)
47988 unsigned int cnt = 0;
47989 for (j = 0; j < 4; ++j)
47990 if ((contents[i] & (1u << j)) != 0)
47991 dremap.perm[2 * i + cnt++] = j;
47992 for (; cnt < 2; ++cnt)
47993 dremap.perm[2 * i + cnt] = 0;
47997 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47998 dfinal.op1 = dfinal.op0;
47999 dfinal.one_operand_p = true;
48000 for (i = 0, j = 0; i < nelt; ++i)
48004 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
48005 if ((d->perm[i] / nelt4) == dremap.perm[j])
48007 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
48008 dfinal.perm[i] |= nelt4;
48010 gcc_unreachable ();
48013 ok = expand_vec_perm_1 (&dremap);
48016 ok = expand_vec_perm_1 (&dfinal);
48022 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
48023 a vector permutation using two instructions, vperm2f128 resp.
48024 vperm2i128 followed by any single in-lane permutation. */
48027 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
48029 struct expand_vec_perm_d dfirst, dsecond;
48030 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
48034 || GET_MODE_SIZE (d->vmode) != 32
48035 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
48039 dsecond.one_operand_p = false;
48040 dsecond.testing_p = true;
48042 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
48043 immediate. For perm < 16 the second permutation uses
48044 d->op0 as first operand, for perm >= 16 it uses d->op1
48045 as first operand. The second operand is the result of
48047 for (perm = 0; perm < 32; perm++)
48049 /* Ignore permutations which do not move anything cross-lane. */
48052 /* The second shuffle for e.g. V4DFmode has
48053 0123 and ABCD operands.
48054 Ignore AB23, as 23 is already in the second lane
48055 of the first operand. */
48056 if ((perm & 0xc) == (1 << 2)) continue;
48057 /* And 01CD, as 01 is in the first lane of the first
48059 if ((perm & 3) == 0) continue;
48060 /* And 4567, as then the vperm2[fi]128 doesn't change
48061 anything on the original 4567 second operand. */
48062 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
48066 /* The second shuffle for e.g. V4DFmode has
48067 4567 and ABCD operands.
48068 Ignore AB67, as 67 is already in the second lane
48069 of the first operand. */
48070 if ((perm & 0xc) == (3 << 2)) continue;
48071 /* And 45CD, as 45 is in the first lane of the first
48073 if ((perm & 3) == 2) continue;
48074 /* And 0123, as then the vperm2[fi]128 doesn't change
48075 anything on the original 0123 first operand. */
48076 if ((perm & 0xf) == (1 << 2)) continue;
48079 for (i = 0; i < nelt; i++)
48081 j = d->perm[i] / nelt2;
48082 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48083 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48084 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48085 dsecond.perm[i] = d->perm[i] & (nelt - 1);
48093 ok = expand_vec_perm_1 (&dsecond);
48104 /* Found a usable second shuffle. dfirst will be
48105 vperm2f128 on d->op0 and d->op1. */
48106 dsecond.testing_p = false;
48108 dfirst.target = gen_reg_rtx (d->vmode);
48109 for (i = 0; i < nelt; i++)
48110 dfirst.perm[i] = (i & (nelt2 - 1))
48111 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48113 canonicalize_perm (&dfirst);
48114 ok = expand_vec_perm_1 (&dfirst);
48117 /* And dsecond is some single insn shuffle, taking
48118 d->op0 and result of vperm2f128 (if perm < 16) or
48119 d->op1 and result of vperm2f128 (otherwise). */
48121 dsecond.op0 = dsecond.op1;
48122 dsecond.op1 = dfirst.target;
48124 ok = expand_vec_perm_1 (&dsecond);
48130 /* For one operand, the only useful vperm2f128 permutation is 0x01
48132 if (d->one_operand_p)
48139 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48140 a two vector permutation using 2 intra-lane interleave insns
48141 and cross-lane shuffle for 32-byte vectors. */
48144 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48147 rtx (*gen) (rtx, rtx, rtx);
48149 if (d->one_operand_p)
48151 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48153 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48159 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48161 for (i = 0; i < nelt; i += 2)
48162 if (d->perm[i] != d->perm[0] + i / 2
48163 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48173 gen = gen_vec_interleave_highv32qi;
48175 gen = gen_vec_interleave_lowv32qi;
48179 gen = gen_vec_interleave_highv16hi;
48181 gen = gen_vec_interleave_lowv16hi;
48185 gen = gen_vec_interleave_highv8si;
48187 gen = gen_vec_interleave_lowv8si;
48191 gen = gen_vec_interleave_highv4di;
48193 gen = gen_vec_interleave_lowv4di;
48197 gen = gen_vec_interleave_highv8sf;
48199 gen = gen_vec_interleave_lowv8sf;
48203 gen = gen_vec_interleave_highv4df;
48205 gen = gen_vec_interleave_lowv4df;
48208 gcc_unreachable ();
48211 emit_insn (gen (d->target, d->op0, d->op1));
48215 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48216 a single vector permutation using a single intra-lane vector
48217 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48218 the non-swapped and swapped vectors together. */
48221 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48223 struct expand_vec_perm_d dfirst, dsecond;
48224 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48227 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48231 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48232 || !d->one_operand_p)
48236 for (i = 0; i < nelt; i++)
48237 dfirst.perm[i] = 0xff;
48238 for (i = 0, msk = 0; i < nelt; i++)
48240 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48241 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48243 dfirst.perm[j] = d->perm[i];
48247 for (i = 0; i < nelt; i++)
48248 if (dfirst.perm[i] == 0xff)
48249 dfirst.perm[i] = i;
48252 dfirst.target = gen_reg_rtx (dfirst.vmode);
48255 ok = expand_vec_perm_1 (&dfirst);
48256 seq = get_insns ();
48268 dsecond.op0 = dfirst.target;
48269 dsecond.op1 = dfirst.target;
48270 dsecond.one_operand_p = true;
48271 dsecond.target = gen_reg_rtx (dsecond.vmode);
48272 for (i = 0; i < nelt; i++)
48273 dsecond.perm[i] = i ^ nelt2;
48275 ok = expand_vec_perm_1 (&dsecond);
48278 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48279 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48283 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48284 permutation using two vperm2f128, followed by a vshufpd insn blending
48285 the two vectors together. */
48288 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48290 struct expand_vec_perm_d dfirst, dsecond, dthird;
48293 if (!TARGET_AVX || (d->vmode != V4DFmode))
48303 dfirst.perm[0] = (d->perm[0] & ~1);
48304 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48305 dfirst.perm[2] = (d->perm[2] & ~1);
48306 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48307 dsecond.perm[0] = (d->perm[1] & ~1);
48308 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48309 dsecond.perm[2] = (d->perm[3] & ~1);
48310 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48311 dthird.perm[0] = (d->perm[0] % 2);
48312 dthird.perm[1] = (d->perm[1] % 2) + 4;
48313 dthird.perm[2] = (d->perm[2] % 2) + 2;
48314 dthird.perm[3] = (d->perm[3] % 2) + 6;
48316 dfirst.target = gen_reg_rtx (dfirst.vmode);
48317 dsecond.target = gen_reg_rtx (dsecond.vmode);
48318 dthird.op0 = dfirst.target;
48319 dthird.op1 = dsecond.target;
48320 dthird.one_operand_p = false;
48322 canonicalize_perm (&dfirst);
48323 canonicalize_perm (&dsecond);
48325 ok = expand_vec_perm_1 (&dfirst)
48326 && expand_vec_perm_1 (&dsecond)
48327 && expand_vec_perm_1 (&dthird);
48334 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48335 permutation with two pshufb insns and an ior. We should have already
48336 failed all two instruction sequences. */
48339 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48341 rtx rperm[2][16], vperm, l, h, op, m128;
48342 unsigned int i, nelt, eltsz;
48344 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48346 gcc_assert (!d->one_operand_p);
48352 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48354 /* Generate two permutation masks. If the required element is within
48355 the given vector it is shuffled into the proper lane. If the required
48356 element is in the other vector, force a zero into the lane by setting
48357 bit 7 in the permutation mask. */
48358 m128 = GEN_INT (-128);
48359 for (i = 0; i < nelt; ++i)
48361 unsigned j, e = d->perm[i];
48362 unsigned which = (e >= nelt);
48366 for (j = 0; j < eltsz; ++j)
48368 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48369 rperm[1-which][i*eltsz + j] = m128;
48373 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48374 vperm = force_reg (V16QImode, vperm);
48376 l = gen_reg_rtx (V16QImode);
48377 op = gen_lowpart (V16QImode, d->op0);
48378 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48380 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48381 vperm = force_reg (V16QImode, vperm);
48383 h = gen_reg_rtx (V16QImode);
48384 op = gen_lowpart (V16QImode, d->op1);
48385 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48388 if (d->vmode != V16QImode)
48389 op = gen_reg_rtx (V16QImode);
48390 emit_insn (gen_iorv16qi3 (op, l, h));
48391 if (op != d->target)
48392 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48397 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48398 with two vpshufb insns, vpermq and vpor. We should have already failed
48399 all two or three instruction sequences. */
48402 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48404 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48405 unsigned int i, nelt, eltsz;
48408 || !d->one_operand_p
48409 || (d->vmode != V32QImode && d->vmode != V16HImode))
48416 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48418 /* Generate two permutation masks. If the required element is within
48419 the same lane, it is shuffled in. If the required element from the
48420 other lane, force a zero by setting bit 7 in the permutation mask.
48421 In the other mask the mask has non-negative elements if element
48422 is requested from the other lane, but also moved to the other lane,
48423 so that the result of vpshufb can have the two V2TImode halves
48425 m128 = GEN_INT (-128);
48426 for (i = 0; i < nelt; ++i)
48428 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48429 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48431 for (j = 0; j < eltsz; ++j)
48433 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48434 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48438 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48439 vperm = force_reg (V32QImode, vperm);
48441 h = gen_reg_rtx (V32QImode);
48442 op = gen_lowpart (V32QImode, d->op0);
48443 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48445 /* Swap the 128-byte lanes of h into hp. */
48446 hp = gen_reg_rtx (V4DImode);
48447 op = gen_lowpart (V4DImode, h);
48448 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48451 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48452 vperm = force_reg (V32QImode, vperm);
48454 l = gen_reg_rtx (V32QImode);
48455 op = gen_lowpart (V32QImode, d->op0);
48456 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48459 if (d->vmode != V32QImode)
48460 op = gen_reg_rtx (V32QImode);
48461 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48462 if (op != d->target)
48463 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48468 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48469 and extract-odd permutations of two V32QImode and V16QImode operand
48470 with two vpshufb insns, vpor and vpermq. We should have already
48471 failed all two or three instruction sequences. */
48474 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48476 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48477 unsigned int i, nelt, eltsz;
48480 || d->one_operand_p
48481 || (d->vmode != V32QImode && d->vmode != V16HImode))
48484 for (i = 0; i < d->nelt; ++i)
48485 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48492 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48494 /* Generate two permutation masks. In the first permutation mask
48495 the first quarter will contain indexes for the first half
48496 of the op0, the second quarter will contain bit 7 set, third quarter
48497 will contain indexes for the second half of the op0 and the
48498 last quarter bit 7 set. In the second permutation mask
48499 the first quarter will contain bit 7 set, the second quarter
48500 indexes for the first half of the op1, the third quarter bit 7 set
48501 and last quarter indexes for the second half of the op1.
48502 I.e. the first mask e.g. for V32QImode extract even will be:
48503 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48504 (all values masked with 0xf except for -128) and second mask
48505 for extract even will be
48506 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48507 m128 = GEN_INT (-128);
48508 for (i = 0; i < nelt; ++i)
48510 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48511 unsigned which = d->perm[i] >= nelt;
48512 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48514 for (j = 0; j < eltsz; ++j)
48516 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48517 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48521 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48522 vperm = force_reg (V32QImode, vperm);
48524 l = gen_reg_rtx (V32QImode);
48525 op = gen_lowpart (V32QImode, d->op0);
48526 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48528 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48529 vperm = force_reg (V32QImode, vperm);
48531 h = gen_reg_rtx (V32QImode);
48532 op = gen_lowpart (V32QImode, d->op1);
48533 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48535 ior = gen_reg_rtx (V32QImode);
48536 emit_insn (gen_iorv32qi3 (ior, l, h));
48538 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48539 op = gen_reg_rtx (V4DImode);
48540 ior = gen_lowpart (V4DImode, ior);
48541 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48542 const1_rtx, GEN_INT (3)));
48543 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48548 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48549 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48550 with two "and" and "pack" or two "shift" and "pack" insns. We should
48551 have already failed all two instruction sequences. */
48554 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48556 rtx op, dop0, dop1, t, rperm[16];
48557 unsigned i, odd, c, s, nelt = d->nelt;
48558 bool end_perm = false;
48559 machine_mode half_mode;
48560 rtx (*gen_and) (rtx, rtx, rtx);
48561 rtx (*gen_pack) (rtx, rtx, rtx);
48562 rtx (*gen_shift) (rtx, rtx, rtx);
48564 if (d->one_operand_p)
48570 /* Required for "pack". */
48571 if (!TARGET_SSE4_1)
48575 half_mode = V4SImode;
48576 gen_and = gen_andv4si3;
48577 gen_pack = gen_sse4_1_packusdw;
48578 gen_shift = gen_lshrv4si3;
48581 /* No check as all instructions are SSE2. */
48584 half_mode = V8HImode;
48585 gen_and = gen_andv8hi3;
48586 gen_pack = gen_sse2_packuswb;
48587 gen_shift = gen_lshrv8hi3;
48594 half_mode = V8SImode;
48595 gen_and = gen_andv8si3;
48596 gen_pack = gen_avx2_packusdw;
48597 gen_shift = gen_lshrv8si3;
48605 half_mode = V16HImode;
48606 gen_and = gen_andv16hi3;
48607 gen_pack = gen_avx2_packuswb;
48608 gen_shift = gen_lshrv16hi3;
48612 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
48613 general shuffles. */
48617 /* Check that permutation is even or odd. */
48622 for (i = 1; i < nelt; ++i)
48623 if (d->perm[i] != 2 * i + odd)
48629 dop0 = gen_reg_rtx (half_mode);
48630 dop1 = gen_reg_rtx (half_mode);
48633 for (i = 0; i < nelt / 2; i++)
48634 rperm[i] = GEN_INT (c);
48635 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
48636 t = force_reg (half_mode, t);
48637 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
48638 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
48642 emit_insn (gen_shift (dop0,
48643 gen_lowpart (half_mode, d->op0),
48645 emit_insn (gen_shift (dop1,
48646 gen_lowpart (half_mode, d->op1),
48649 /* In AVX2 for 256 bit case we need to permute pack result. */
48650 if (TARGET_AVX2 && end_perm)
48652 op = gen_reg_rtx (d->vmode);
48653 t = gen_reg_rtx (V4DImode);
48654 emit_insn (gen_pack (op, dop0, dop1));
48655 emit_insn (gen_avx2_permv4di_1 (t,
48656 gen_lowpart (V4DImode, op),
48661 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
48664 emit_insn (gen_pack (d->target, dop0, dop1));
48669 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
48670 and extract-odd permutations. */
48673 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
48675 rtx t1, t2, t3, t4, t5;
48682 t1 = gen_reg_rtx (V4DFmode);
48683 t2 = gen_reg_rtx (V4DFmode);
48685 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48686 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
48687 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
48689 /* Now an unpck[lh]pd will produce the result required. */
48691 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
48693 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
48699 int mask = odd ? 0xdd : 0x88;
48703 t1 = gen_reg_rtx (V8SFmode);
48704 t2 = gen_reg_rtx (V8SFmode);
48705 t3 = gen_reg_rtx (V8SFmode);
48707 /* Shuffle within the 128-bit lanes to produce:
48708 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
48709 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
48712 /* Shuffle the lanes around to produce:
48713 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
48714 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
48717 /* Shuffle within the 128-bit lanes to produce:
48718 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
48719 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
48721 /* Shuffle within the 128-bit lanes to produce:
48722 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
48723 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
48725 /* Shuffle the lanes around to produce:
48726 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
48727 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
48736 /* These are always directly implementable by expand_vec_perm_1. */
48737 gcc_unreachable ();
48741 return expand_vec_perm_even_odd_pack (d);
48742 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
48743 return expand_vec_perm_pshufb2 (d);
48748 /* We need 2*log2(N)-1 operations to achieve odd/even
48749 with interleave. */
48750 t1 = gen_reg_rtx (V8HImode);
48751 t2 = gen_reg_rtx (V8HImode);
48752 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
48753 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
48754 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
48755 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
48757 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
48759 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
48765 return expand_vec_perm_even_odd_pack (d);
48769 return expand_vec_perm_even_odd_pack (d);
48774 struct expand_vec_perm_d d_copy = *d;
48775 d_copy.vmode = V4DFmode;
48777 d_copy.target = gen_lowpart (V4DFmode, d->target);
48779 d_copy.target = gen_reg_rtx (V4DFmode);
48780 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
48781 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
48782 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48785 emit_move_insn (d->target,
48786 gen_lowpart (V4DImode, d_copy.target));
48795 t1 = gen_reg_rtx (V4DImode);
48796 t2 = gen_reg_rtx (V4DImode);
48798 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48799 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
48800 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
48802 /* Now an vpunpck[lh]qdq will produce the result required. */
48804 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
48806 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
48813 struct expand_vec_perm_d d_copy = *d;
48814 d_copy.vmode = V8SFmode;
48816 d_copy.target = gen_lowpart (V8SFmode, d->target);
48818 d_copy.target = gen_reg_rtx (V8SFmode);
48819 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
48820 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
48821 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48824 emit_move_insn (d->target,
48825 gen_lowpart (V8SImode, d_copy.target));
48834 t1 = gen_reg_rtx (V8SImode);
48835 t2 = gen_reg_rtx (V8SImode);
48836 t3 = gen_reg_rtx (V4DImode);
48837 t4 = gen_reg_rtx (V4DImode);
48838 t5 = gen_reg_rtx (V4DImode);
48840 /* Shuffle the lanes around into
48841 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
48842 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
48843 gen_lowpart (V4DImode, d->op1),
48845 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
48846 gen_lowpart (V4DImode, d->op1),
48849 /* Swap the 2nd and 3rd position in each lane into
48850 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
48851 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
48852 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48853 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
48854 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48856 /* Now an vpunpck[lh]qdq will produce
48857 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
48859 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
48860 gen_lowpart (V4DImode, t2));
48862 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
48863 gen_lowpart (V4DImode, t2));
48865 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
48869 gcc_unreachable ();
48875 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48876 extract-even and extract-odd permutations. */
48879 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
48881 unsigned i, odd, nelt = d->nelt;
48884 if (odd != 0 && odd != 1)
48887 for (i = 1; i < nelt; ++i)
48888 if (d->perm[i] != 2 * i + odd)
48891 return expand_vec_perm_even_odd_1 (d, odd);
48894 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
48895 permutations. We assume that expand_vec_perm_1 has already failed. */
48898 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
48900 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
48901 machine_mode vmode = d->vmode;
48902 unsigned char perm2[4];
48903 rtx op0 = d->op0, dest;
48910 /* These are special-cased in sse.md so that we can optionally
48911 use the vbroadcast instruction. They expand to two insns
48912 if the input happens to be in a register. */
48913 gcc_unreachable ();
48919 /* These are always implementable using standard shuffle patterns. */
48920 gcc_unreachable ();
48924 /* These can be implemented via interleave. We save one insn by
48925 stopping once we have promoted to V4SImode and then use pshufd. */
48931 rtx (*gen) (rtx, rtx, rtx)
48932 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
48933 : gen_vec_interleave_lowv8hi;
48937 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
48938 : gen_vec_interleave_highv8hi;
48943 dest = gen_reg_rtx (vmode);
48944 emit_insn (gen (dest, op0, op0));
48945 vmode = get_mode_wider_vector (vmode);
48946 op0 = gen_lowpart (vmode, dest);
48948 while (vmode != V4SImode);
48950 memset (perm2, elt, 4);
48951 dest = gen_reg_rtx (V4SImode);
48952 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
48955 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
48963 /* For AVX2 broadcasts of the first element vpbroadcast* or
48964 vpermq should be used by expand_vec_perm_1. */
48965 gcc_assert (!TARGET_AVX2 || d->perm[0]);
48969 gcc_unreachable ();
48973 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48974 broadcast permutations. */
48977 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
48979 unsigned i, elt, nelt = d->nelt;
48981 if (!d->one_operand_p)
48985 for (i = 1; i < nelt; ++i)
48986 if (d->perm[i] != elt)
48989 return expand_vec_perm_broadcast_1 (d);
48992 /* Implement arbitrary permutations of two V64QImode operands
48993 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
48995 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
48997 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
49003 struct expand_vec_perm_d ds[2];
49004 rtx rperm[128], vperm, target0, target1;
49005 unsigned int i, nelt;
49006 machine_mode vmode;
49011 for (i = 0; i < 2; i++)
49014 ds[i].vmode = V32HImode;
49016 ds[i].target = gen_reg_rtx (V32HImode);
49017 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
49018 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
49021 /* Prepare permutations such that the first one takes care of
49022 putting the even bytes into the right positions or one higher
49023 positions (ds[0]) and the second one takes care of
49024 putting the odd bytes into the right positions or one below
49027 for (i = 0; i < nelt; i++)
49029 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
49032 rperm[i] = constm1_rtx;
49033 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49037 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49038 rperm[i + 64] = constm1_rtx;
49042 bool ok = expand_vec_perm_1 (&ds[0]);
49044 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
49046 ok = expand_vec_perm_1 (&ds[1]);
49048 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
49050 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
49051 vperm = force_reg (vmode, vperm);
49052 target0 = gen_reg_rtx (V64QImode);
49053 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
49055 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
49056 vperm = force_reg (vmode, vperm);
49057 target1 = gen_reg_rtx (V64QImode);
49058 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
49060 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
49064 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
49065 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
49066 all the shorter instruction sequences. */
49069 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
49071 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
49072 unsigned int i, nelt, eltsz;
49076 || d->one_operand_p
49077 || (d->vmode != V32QImode && d->vmode != V16HImode))
49084 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
49086 /* Generate 4 permutation masks. If the required element is within
49087 the same lane, it is shuffled in. If the required element from the
49088 other lane, force a zero by setting bit 7 in the permutation mask.
49089 In the other mask the mask has non-negative elements if element
49090 is requested from the other lane, but also moved to the other lane,
49091 so that the result of vpshufb can have the two V2TImode halves
49093 m128 = GEN_INT (-128);
49094 for (i = 0; i < 32; ++i)
49096 rperm[0][i] = m128;
49097 rperm[1][i] = m128;
49098 rperm[2][i] = m128;
49099 rperm[3][i] = m128;
49105 for (i = 0; i < nelt; ++i)
49107 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
49108 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
49109 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
49111 for (j = 0; j < eltsz; ++j)
49112 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
49113 used[which] = true;
49116 for (i = 0; i < 2; ++i)
49118 if (!used[2 * i + 1])
49123 vperm = gen_rtx_CONST_VECTOR (V32QImode,
49124 gen_rtvec_v (32, rperm[2 * i + 1]));
49125 vperm = force_reg (V32QImode, vperm);
49126 h[i] = gen_reg_rtx (V32QImode);
49127 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49128 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
49131 /* Swap the 128-byte lanes of h[X]. */
49132 for (i = 0; i < 2; ++i)
49134 if (h[i] == NULL_RTX)
49136 op = gen_reg_rtx (V4DImode);
49137 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
49138 const2_rtx, GEN_INT (3), const0_rtx,
49140 h[i] = gen_lowpart (V32QImode, op);
49143 for (i = 0; i < 2; ++i)
49150 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49151 vperm = force_reg (V32QImode, vperm);
49152 l[i] = gen_reg_rtx (V32QImode);
49153 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49154 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49157 for (i = 0; i < 2; ++i)
49161 op = gen_reg_rtx (V32QImode);
49162 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49169 gcc_assert (l[0] && l[1]);
49171 if (d->vmode != V32QImode)
49172 op = gen_reg_rtx (V32QImode);
49173 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49174 if (op != d->target)
49175 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49179 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49180 With all of the interface bits taken care of, perform the expansion
49181 in D and return true on success. */
49184 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49186 /* Try a single instruction expansion. */
49187 if (expand_vec_perm_1 (d))
49190 /* Try sequences of two instructions. */
49192 if (expand_vec_perm_pshuflw_pshufhw (d))
49195 if (expand_vec_perm_palignr (d, false))
49198 if (expand_vec_perm_interleave2 (d))
49201 if (expand_vec_perm_broadcast (d))
49204 if (expand_vec_perm_vpermq_perm_1 (d))
49207 if (expand_vec_perm_vperm2f128 (d))
49210 if (expand_vec_perm_pblendv (d))
49213 /* Try sequences of three instructions. */
49215 if (expand_vec_perm_even_odd_pack (d))
49218 if (expand_vec_perm_2vperm2f128_vshuf (d))
49221 if (expand_vec_perm_pshufb2 (d))
49224 if (expand_vec_perm_interleave3 (d))
49227 if (expand_vec_perm_vperm2f128_vblend (d))
49230 /* Try sequences of four instructions. */
49232 if (expand_vec_perm_vpshufb2_vpermq (d))
49235 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49238 if (expand_vec_perm_vpermi2_vpshub2 (d))
49241 /* ??? Look for narrow permutations whose element orderings would
49242 allow the promotion to a wider mode. */
49244 /* ??? Look for sequences of interleave or a wider permute that place
49245 the data into the correct lanes for a half-vector shuffle like
49246 pshuf[lh]w or vpermilps. */
49248 /* ??? Look for sequences of interleave that produce the desired results.
49249 The combinatorics of punpck[lh] get pretty ugly... */
49251 if (expand_vec_perm_even_odd (d))
49254 /* Even longer sequences. */
49255 if (expand_vec_perm_vpshufb4_vpermq2 (d))
49261 /* If a permutation only uses one operand, make it clear. Returns true
49262 if the permutation references both operands. */
49265 canonicalize_perm (struct expand_vec_perm_d *d)
49267 int i, which, nelt = d->nelt;
49269 for (i = which = 0; i < nelt; ++i)
49270 which |= (d->perm[i] < nelt ? 1 : 2);
49272 d->one_operand_p = true;
49279 if (!rtx_equal_p (d->op0, d->op1))
49281 d->one_operand_p = false;
49284 /* The elements of PERM do not suggest that only the first operand
49285 is used, but both operands are identical. Allow easier matching
49286 of the permutation by folding the permutation into the single
49291 for (i = 0; i < nelt; ++i)
49292 d->perm[i] &= nelt - 1;
49301 return (which == 3);
49305 ix86_expand_vec_perm_const (rtx operands[4])
49307 struct expand_vec_perm_d d;
49308 unsigned char perm[MAX_VECT_LEN];
49313 d.target = operands[0];
49314 d.op0 = operands[1];
49315 d.op1 = operands[2];
49318 d.vmode = GET_MODE (d.target);
49319 gcc_assert (VECTOR_MODE_P (d.vmode));
49320 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49321 d.testing_p = false;
49323 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49324 gcc_assert (XVECLEN (sel, 0) == nelt);
49325 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49327 for (i = 0; i < nelt; ++i)
49329 rtx e = XVECEXP (sel, 0, i);
49330 int ei = INTVAL (e) & (2 * nelt - 1);
49335 two_args = canonicalize_perm (&d);
49337 if (ix86_expand_vec_perm_const_1 (&d))
49340 /* If the selector says both arguments are needed, but the operands are the
49341 same, the above tried to expand with one_operand_p and flattened selector.
49342 If that didn't work, retry without one_operand_p; we succeeded with that
49344 if (two_args && d.one_operand_p)
49346 d.one_operand_p = false;
49347 memcpy (d.perm, perm, sizeof (perm));
49348 return ix86_expand_vec_perm_const_1 (&d);
49354 /* Implement targetm.vectorize.vec_perm_const_ok. */
49357 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49358 const unsigned char *sel)
49360 struct expand_vec_perm_d d;
49361 unsigned int i, nelt, which;
49365 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49366 d.testing_p = true;
49368 /* Given sufficient ISA support we can just return true here
49369 for selected vector modes. */
49376 if (TARGET_AVX512F)
49377 /* All implementable with a single vpermi2 insn. */
49381 if (TARGET_AVX512BW)
49382 /* All implementable with a single vpermi2 insn. */
49386 if (TARGET_AVX512BW)
49387 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
49394 if (TARGET_AVX512VL)
49395 /* All implementable with a single vpermi2 insn. */
49400 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49405 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49412 /* All implementable with a single vpperm insn. */
49415 /* All implementable with 2 pshufb + 1 ior. */
49421 /* All implementable with shufpd or unpck[lh]pd. */
49427 /* Extract the values from the vector CST into the permutation
49429 memcpy (d.perm, sel, nelt);
49430 for (i = which = 0; i < nelt; ++i)
49432 unsigned char e = d.perm[i];
49433 gcc_assert (e < 2 * nelt);
49434 which |= (e < nelt ? 1 : 2);
49437 /* For all elements from second vector, fold the elements to first. */
49439 for (i = 0; i < nelt; ++i)
49442 /* Check whether the mask can be applied to the vector type. */
49443 d.one_operand_p = (which != 3);
49445 /* Implementable with shufps or pshufd. */
49446 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49449 /* Otherwise we have to go through the motions and see if we can
49450 figure out how to generate the requested permutation. */
49451 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49452 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49453 if (!d.one_operand_p)
49454 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49457 ret = ix86_expand_vec_perm_const_1 (&d);
49464 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49466 struct expand_vec_perm_d d;
49472 d.vmode = GET_MODE (targ);
49473 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49474 d.one_operand_p = false;
49475 d.testing_p = false;
49477 for (i = 0; i < nelt; ++i)
49478 d.perm[i] = i * 2 + odd;
49480 /* We'll either be able to implement the permutation directly... */
49481 if (expand_vec_perm_1 (&d))
49484 /* ... or we use the special-case patterns. */
49485 expand_vec_perm_even_odd_1 (&d, odd);
49489 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49491 struct expand_vec_perm_d d;
49492 unsigned i, nelt, base;
49498 d.vmode = GET_MODE (targ);
49499 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49500 d.one_operand_p = false;
49501 d.testing_p = false;
49503 base = high_p ? nelt / 2 : 0;
49504 for (i = 0; i < nelt / 2; ++i)
49506 d.perm[i * 2] = i + base;
49507 d.perm[i * 2 + 1] = i + base + nelt;
49510 /* Note that for AVX this isn't one instruction. */
49511 ok = ix86_expand_vec_perm_const_1 (&d);
49516 /* Expand a vector operation CODE for a V*QImode in terms of the
49517 same operation on V*HImode. */
49520 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49522 machine_mode qimode = GET_MODE (dest);
49523 machine_mode himode;
49524 rtx (*gen_il) (rtx, rtx, rtx);
49525 rtx (*gen_ih) (rtx, rtx, rtx);
49526 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49527 struct expand_vec_perm_d d;
49528 bool ok, full_interleave;
49529 bool uns_p = false;
49536 gen_il = gen_vec_interleave_lowv16qi;
49537 gen_ih = gen_vec_interleave_highv16qi;
49540 himode = V16HImode;
49541 gen_il = gen_avx2_interleave_lowv32qi;
49542 gen_ih = gen_avx2_interleave_highv32qi;
49545 himode = V32HImode;
49546 gen_il = gen_avx512bw_interleave_lowv64qi;
49547 gen_ih = gen_avx512bw_interleave_highv64qi;
49550 gcc_unreachable ();
49553 op2_l = op2_h = op2;
49557 /* Unpack data such that we've got a source byte in each low byte of
49558 each word. We don't care what goes into the high byte of each word.
49559 Rather than trying to get zero in there, most convenient is to let
49560 it be a copy of the low byte. */
49561 op2_l = gen_reg_rtx (qimode);
49562 op2_h = gen_reg_rtx (qimode);
49563 emit_insn (gen_il (op2_l, op2, op2));
49564 emit_insn (gen_ih (op2_h, op2, op2));
49567 op1_l = gen_reg_rtx (qimode);
49568 op1_h = gen_reg_rtx (qimode);
49569 emit_insn (gen_il (op1_l, op1, op1));
49570 emit_insn (gen_ih (op1_h, op1, op1));
49571 full_interleave = qimode == V16QImode;
49579 op1_l = gen_reg_rtx (himode);
49580 op1_h = gen_reg_rtx (himode);
49581 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
49582 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
49583 full_interleave = true;
49586 gcc_unreachable ();
49589 /* Perform the operation. */
49590 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
49592 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
49594 gcc_assert (res_l && res_h);
49596 /* Merge the data back into the right place. */
49598 d.op0 = gen_lowpart (qimode, res_l);
49599 d.op1 = gen_lowpart (qimode, res_h);
49601 d.nelt = GET_MODE_NUNITS (qimode);
49602 d.one_operand_p = false;
49603 d.testing_p = false;
49605 if (full_interleave)
49607 /* For SSE2, we used an full interleave, so the desired
49608 results are in the even elements. */
49609 for (i = 0; i < 64; ++i)
49614 /* For AVX, the interleave used above was not cross-lane. So the
49615 extraction is evens but with the second and third quarter swapped.
49616 Happily, that is even one insn shorter than even extraction. */
49617 for (i = 0; i < 64; ++i)
49618 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
49621 ok = ix86_expand_vec_perm_const_1 (&d);
49624 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49625 gen_rtx_fmt_ee (code, qimode, op1, op2));
49628 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
49629 if op is CONST_VECTOR with all odd elements equal to their
49630 preceding element. */
49633 const_vector_equal_evenodd_p (rtx op)
49635 machine_mode mode = GET_MODE (op);
49636 int i, nunits = GET_MODE_NUNITS (mode);
49637 if (GET_CODE (op) != CONST_VECTOR
49638 || nunits != CONST_VECTOR_NUNITS (op))
49640 for (i = 0; i < nunits; i += 2)
49641 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
49647 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
49648 bool uns_p, bool odd_p)
49650 machine_mode mode = GET_MODE (op1);
49651 machine_mode wmode = GET_MODE (dest);
49653 rtx orig_op1 = op1, orig_op2 = op2;
49655 if (!nonimmediate_operand (op1, mode))
49656 op1 = force_reg (mode, op1);
49657 if (!nonimmediate_operand (op2, mode))
49658 op2 = force_reg (mode, op2);
49660 /* We only play even/odd games with vectors of SImode. */
49661 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
49663 /* If we're looking for the odd results, shift those members down to
49664 the even slots. For some cpus this is faster than a PSHUFD. */
49667 /* For XOP use vpmacsdqh, but only for smult, as it is only
49669 if (TARGET_XOP && mode == V4SImode && !uns_p)
49671 x = force_reg (wmode, CONST0_RTX (wmode));
49672 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
49676 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
49677 if (!const_vector_equal_evenodd_p (orig_op1))
49678 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
49679 x, NULL, 1, OPTAB_DIRECT);
49680 if (!const_vector_equal_evenodd_p (orig_op2))
49681 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
49682 x, NULL, 1, OPTAB_DIRECT);
49683 op1 = gen_lowpart (mode, op1);
49684 op2 = gen_lowpart (mode, op2);
49687 if (mode == V16SImode)
49690 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
49692 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
49694 else if (mode == V8SImode)
49697 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
49699 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
49702 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
49703 else if (TARGET_SSE4_1)
49704 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
49707 rtx s1, s2, t0, t1, t2;
49709 /* The easiest way to implement this without PMULDQ is to go through
49710 the motions as if we are performing a full 64-bit multiply. With
49711 the exception that we need to do less shuffling of the elements. */
49713 /* Compute the sign-extension, aka highparts, of the two operands. */
49714 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49715 op1, pc_rtx, pc_rtx);
49716 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49717 op2, pc_rtx, pc_rtx);
49719 /* Multiply LO(A) * HI(B), and vice-versa. */
49720 t1 = gen_reg_rtx (wmode);
49721 t2 = gen_reg_rtx (wmode);
49722 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
49723 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
49725 /* Multiply LO(A) * LO(B). */
49726 t0 = gen_reg_rtx (wmode);
49727 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
49729 /* Combine and shift the highparts into place. */
49730 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
49731 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
49734 /* Combine high and low parts. */
49735 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
49742 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
49743 bool uns_p, bool high_p)
49745 machine_mode wmode = GET_MODE (dest);
49746 machine_mode mode = GET_MODE (op1);
49747 rtx t1, t2, t3, t4, mask;
49752 t1 = gen_reg_rtx (mode);
49753 t2 = gen_reg_rtx (mode);
49754 if (TARGET_XOP && !uns_p)
49756 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
49757 shuffle the elements once so that all elements are in the right
49758 place for immediate use: { A C B D }. */
49759 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
49760 const1_rtx, GEN_INT (3)));
49761 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
49762 const1_rtx, GEN_INT (3)));
49766 /* Put the elements into place for the multiply. */
49767 ix86_expand_vec_interleave (t1, op1, op1, high_p);
49768 ix86_expand_vec_interleave (t2, op2, op2, high_p);
49771 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
49775 /* Shuffle the elements between the lanes. After this we
49776 have { A B E F | C D G H } for each operand. */
49777 t1 = gen_reg_rtx (V4DImode);
49778 t2 = gen_reg_rtx (V4DImode);
49779 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
49780 const0_rtx, const2_rtx,
49781 const1_rtx, GEN_INT (3)));
49782 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
49783 const0_rtx, const2_rtx,
49784 const1_rtx, GEN_INT (3)));
49786 /* Shuffle the elements within the lanes. After this we
49787 have { A A B B | C C D D } or { E E F F | G G H H }. */
49788 t3 = gen_reg_rtx (V8SImode);
49789 t4 = gen_reg_rtx (V8SImode);
49790 mask = GEN_INT (high_p
49791 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
49792 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
49793 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
49794 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
49796 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
49801 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
49802 uns_p, OPTAB_DIRECT);
49803 t2 = expand_binop (mode,
49804 uns_p ? umul_highpart_optab : smul_highpart_optab,
49805 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
49806 gcc_assert (t1 && t2);
49808 t3 = gen_reg_rtx (mode);
49809 ix86_expand_vec_interleave (t3, t1, t2, high_p);
49810 emit_move_insn (dest, gen_lowpart (wmode, t3));
49818 t1 = gen_reg_rtx (wmode);
49819 t2 = gen_reg_rtx (wmode);
49820 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
49821 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
49823 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
49827 gcc_unreachable ();
49832 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
49834 rtx res_1, res_2, res_3, res_4;
49836 res_1 = gen_reg_rtx (V4SImode);
49837 res_2 = gen_reg_rtx (V4SImode);
49838 res_3 = gen_reg_rtx (V2DImode);
49839 res_4 = gen_reg_rtx (V2DImode);
49840 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
49841 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
49843 /* Move the results in element 2 down to element 1; we don't care
49844 what goes in elements 2 and 3. Then we can merge the parts
49845 back together with an interleave.
49847 Note that two other sequences were tried:
49848 (1) Use interleaves at the start instead of psrldq, which allows
49849 us to use a single shufps to merge things back at the end.
49850 (2) Use shufps here to combine the two vectors, then pshufd to
49851 put the elements in the correct order.
49852 In both cases the cost of the reformatting stall was too high
49853 and the overall sequence slower. */
49855 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
49856 const0_rtx, const2_rtx,
49857 const0_rtx, const0_rtx));
49858 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
49859 const0_rtx, const2_rtx,
49860 const0_rtx, const0_rtx));
49861 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
49863 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
49867 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
49869 machine_mode mode = GET_MODE (op0);
49870 rtx t1, t2, t3, t4, t5, t6;
49872 if (TARGET_AVX512DQ && mode == V8DImode)
49873 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
49874 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
49875 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
49876 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
49877 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
49878 else if (TARGET_XOP && mode == V2DImode)
49880 /* op1: A,B,C,D, op2: E,F,G,H */
49881 op1 = gen_lowpart (V4SImode, op1);
49882 op2 = gen_lowpart (V4SImode, op2);
49884 t1 = gen_reg_rtx (V4SImode);
49885 t2 = gen_reg_rtx (V4SImode);
49886 t3 = gen_reg_rtx (V2DImode);
49887 t4 = gen_reg_rtx (V2DImode);
49890 emit_insn (gen_sse2_pshufd_1 (t1, op1,
49896 /* t2: (B*E),(A*F),(D*G),(C*H) */
49897 emit_insn (gen_mulv4si3 (t2, t1, op2));
49899 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
49900 emit_insn (gen_xop_phadddq (t3, t2));
49902 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
49903 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
49905 /* Multiply lower parts and add all */
49906 t5 = gen_reg_rtx (V2DImode);
49907 emit_insn (gen_vec_widen_umult_even_v4si (t5,
49908 gen_lowpart (V4SImode, op1),
49909 gen_lowpart (V4SImode, op2)));
49910 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
49915 machine_mode nmode;
49916 rtx (*umul) (rtx, rtx, rtx);
49918 if (mode == V2DImode)
49920 umul = gen_vec_widen_umult_even_v4si;
49923 else if (mode == V4DImode)
49925 umul = gen_vec_widen_umult_even_v8si;
49928 else if (mode == V8DImode)
49930 umul = gen_vec_widen_umult_even_v16si;
49934 gcc_unreachable ();
49937 /* Multiply low parts. */
49938 t1 = gen_reg_rtx (mode);
49939 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
49941 /* Shift input vectors right 32 bits so we can multiply high parts. */
49943 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
49944 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
49946 /* Multiply high parts by low parts. */
49947 t4 = gen_reg_rtx (mode);
49948 t5 = gen_reg_rtx (mode);
49949 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
49950 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
49952 /* Combine and shift the highparts back. */
49953 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
49954 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
49956 /* Combine high and low parts. */
49957 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
49960 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49961 gen_rtx_MULT (mode, op1, op2));
49964 /* Return 1 if control tansfer instruction INSN
49965 should be encoded with bnd prefix.
49966 If insn is NULL then return 1 when control
49967 transfer instructions should be prefixed with
49968 bnd by default for current function. */
49971 ix86_bnd_prefixed_insn_p (rtx insn)
49973 /* For call insns check special flag. */
49974 if (insn && CALL_P (insn))
49976 rtx call = get_call_rtx_from (insn);
49978 return CALL_EXPR_WITH_BOUNDS_P (call);
49981 /* All other insns are prefixed only if function is instrumented. */
49982 return chkp_function_instrumented_p (current_function_decl);
49985 /* Calculate integer abs() using only SSE2 instructions. */
49988 ix86_expand_sse2_abs (rtx target, rtx input)
49990 machine_mode mode = GET_MODE (target);
49995 /* For 32-bit signed integer X, the best way to calculate the absolute
49996 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
49998 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
49999 GEN_INT (GET_MODE_BITSIZE
50000 (GET_MODE_INNER (mode)) - 1),
50001 NULL, 0, OPTAB_DIRECT);
50002 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
50003 NULL, 0, OPTAB_DIRECT);
50004 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
50005 target, 0, OPTAB_DIRECT);
50008 /* For 16-bit signed integer X, the best way to calculate the absolute
50009 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
50011 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50013 x = expand_simple_binop (mode, SMAX, tmp0, input,
50014 target, 0, OPTAB_DIRECT);
50017 /* For 8-bit signed integer X, the best way to calculate the absolute
50018 value of X is min ((unsigned char) X, (unsigned char) (-X)),
50019 as SSE2 provides the PMINUB insn. */
50021 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50023 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
50024 target, 0, OPTAB_DIRECT);
50028 gcc_unreachable ();
50032 emit_move_insn (target, x);
50035 /* Expand an insert into a vector register through pinsr insn.
50036 Return true if successful. */
50039 ix86_expand_pinsr (rtx *operands)
50041 rtx dst = operands[0];
50042 rtx src = operands[3];
50044 unsigned int size = INTVAL (operands[1]);
50045 unsigned int pos = INTVAL (operands[2]);
50047 if (GET_CODE (dst) == SUBREG)
50049 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
50050 dst = SUBREG_REG (dst);
50053 if (GET_CODE (src) == SUBREG)
50054 src = SUBREG_REG (src);
50056 switch (GET_MODE (dst))
50063 machine_mode srcmode, dstmode;
50064 rtx (*pinsr)(rtx, rtx, rtx, rtx);
50066 srcmode = mode_for_size (size, MODE_INT, 0);
50071 if (!TARGET_SSE4_1)
50073 dstmode = V16QImode;
50074 pinsr = gen_sse4_1_pinsrb;
50080 dstmode = V8HImode;
50081 pinsr = gen_sse2_pinsrw;
50085 if (!TARGET_SSE4_1)
50087 dstmode = V4SImode;
50088 pinsr = gen_sse4_1_pinsrd;
50092 gcc_assert (TARGET_64BIT);
50093 if (!TARGET_SSE4_1)
50095 dstmode = V2DImode;
50096 pinsr = gen_sse4_1_pinsrq;
50104 if (GET_MODE (dst) != dstmode)
50105 d = gen_reg_rtx (dstmode);
50106 src = gen_lowpart (srcmode, src);
50110 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
50111 GEN_INT (1 << pos)));
50113 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50122 /* This function returns the calling abi specific va_list type node.
50123 It returns the FNDECL specific va_list type. */
50126 ix86_fn_abi_va_list (tree fndecl)
50129 return va_list_type_node;
50130 gcc_assert (fndecl != NULL_TREE);
50132 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
50133 return ms_va_list_type_node;
50135 return sysv_va_list_type_node;
50138 /* Returns the canonical va_list type specified by TYPE. If there
50139 is no valid TYPE provided, it return NULL_TREE. */
50142 ix86_canonical_va_list_type (tree type)
50146 /* Resolve references and pointers to va_list type. */
50147 if (TREE_CODE (type) == MEM_REF)
50148 type = TREE_TYPE (type);
50149 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
50150 type = TREE_TYPE (type);
50151 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
50152 type = TREE_TYPE (type);
50154 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
50156 wtype = va_list_type_node;
50157 gcc_assert (wtype != NULL_TREE);
50159 if (TREE_CODE (wtype) == ARRAY_TYPE)
50161 /* If va_list is an array type, the argument may have decayed
50162 to a pointer type, e.g. by being passed to another function.
50163 In that case, unwrap both types so that we can compare the
50164 underlying records. */
50165 if (TREE_CODE (htype) == ARRAY_TYPE
50166 || POINTER_TYPE_P (htype))
50168 wtype = TREE_TYPE (wtype);
50169 htype = TREE_TYPE (htype);
50172 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50173 return va_list_type_node;
50174 wtype = sysv_va_list_type_node;
50175 gcc_assert (wtype != NULL_TREE);
50177 if (TREE_CODE (wtype) == ARRAY_TYPE)
50179 /* If va_list is an array type, the argument may have decayed
50180 to a pointer type, e.g. by being passed to another function.
50181 In that case, unwrap both types so that we can compare the
50182 underlying records. */
50183 if (TREE_CODE (htype) == ARRAY_TYPE
50184 || POINTER_TYPE_P (htype))
50186 wtype = TREE_TYPE (wtype);
50187 htype = TREE_TYPE (htype);
50190 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50191 return sysv_va_list_type_node;
50192 wtype = ms_va_list_type_node;
50193 gcc_assert (wtype != NULL_TREE);
50195 if (TREE_CODE (wtype) == ARRAY_TYPE)
50197 /* If va_list is an array type, the argument may have decayed
50198 to a pointer type, e.g. by being passed to another function.
50199 In that case, unwrap both types so that we can compare the
50200 underlying records. */
50201 if (TREE_CODE (htype) == ARRAY_TYPE
50202 || POINTER_TYPE_P (htype))
50204 wtype = TREE_TYPE (wtype);
50205 htype = TREE_TYPE (htype);
50208 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50209 return ms_va_list_type_node;
50212 return std_canonical_va_list_type (type);
50215 /* Iterate through the target-specific builtin types for va_list.
50216 IDX denotes the iterator, *PTREE is set to the result type of
50217 the va_list builtin, and *PNAME to its internal type.
50218 Returns zero if there is no element for this index, otherwise
50219 IDX should be increased upon the next call.
50220 Note, do not iterate a base builtin's name like __builtin_va_list.
50221 Used from c_common_nodes_and_builtins. */
50224 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50234 *ptree = ms_va_list_type_node;
50235 *pname = "__builtin_ms_va_list";
50239 *ptree = sysv_va_list_type_node;
50240 *pname = "__builtin_sysv_va_list";
50248 #undef TARGET_SCHED_DISPATCH
50249 #define TARGET_SCHED_DISPATCH has_dispatch
50250 #undef TARGET_SCHED_DISPATCH_DO
50251 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50252 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50253 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50254 #undef TARGET_SCHED_REORDER
50255 #define TARGET_SCHED_REORDER ix86_sched_reorder
50256 #undef TARGET_SCHED_ADJUST_PRIORITY
50257 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50258 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50259 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50260 ix86_dependencies_evaluation_hook
50262 /* The size of the dispatch window is the total number of bytes of
50263 object code allowed in a window. */
50264 #define DISPATCH_WINDOW_SIZE 16
50266 /* Number of dispatch windows considered for scheduling. */
50267 #define MAX_DISPATCH_WINDOWS 3
50269 /* Maximum number of instructions in a window. */
50272 /* Maximum number of immediate operands in a window. */
50275 /* Maximum number of immediate bits allowed in a window. */
50276 #define MAX_IMM_SIZE 128
50278 /* Maximum number of 32 bit immediates allowed in a window. */
50279 #define MAX_IMM_32 4
50281 /* Maximum number of 64 bit immediates allowed in a window. */
50282 #define MAX_IMM_64 2
50284 /* Maximum total of loads or prefetches allowed in a window. */
50287 /* Maximum total of stores allowed in a window. */
50288 #define MAX_STORE 1
50294 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50295 enum dispatch_group {
50310 /* Number of allowable groups in a dispatch window. It is an array
50311 indexed by dispatch_group enum. 100 is used as a big number,
50312 because the number of these kind of operations does not have any
50313 effect in dispatch window, but we need them for other reasons in
50315 static unsigned int num_allowable_groups[disp_last] = {
50316 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50319 char group_name[disp_last + 1][16] = {
50320 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50321 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50322 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50325 /* Instruction path. */
50328 path_single, /* Single micro op. */
50329 path_double, /* Double micro op. */
50330 path_multi, /* Instructions with more than 2 micro op.. */
50334 /* sched_insn_info defines a window to the instructions scheduled in
50335 the basic block. It contains a pointer to the insn_info table and
50336 the instruction scheduled.
50338 Windows are allocated for each basic block and are linked
50340 typedef struct sched_insn_info_s {
50342 enum dispatch_group group;
50343 enum insn_path path;
50348 /* Linked list of dispatch windows. This is a two way list of
50349 dispatch windows of a basic block. It contains information about
50350 the number of uops in the window and the total number of
50351 instructions and of bytes in the object code for this dispatch
50353 typedef struct dispatch_windows_s {
50354 int num_insn; /* Number of insn in the window. */
50355 int num_uops; /* Number of uops in the window. */
50356 int window_size; /* Number of bytes in the window. */
50357 int window_num; /* Window number between 0 or 1. */
50358 int num_imm; /* Number of immediates in an insn. */
50359 int num_imm_32; /* Number of 32 bit immediates in an insn. */
50360 int num_imm_64; /* Number of 64 bit immediates in an insn. */
50361 int imm_size; /* Total immediates in the window. */
50362 int num_loads; /* Total memory loads in the window. */
50363 int num_stores; /* Total memory stores in the window. */
50364 int violation; /* Violation exists in window. */
50365 sched_insn_info *window; /* Pointer to the window. */
50366 struct dispatch_windows_s *next;
50367 struct dispatch_windows_s *prev;
50368 } dispatch_windows;
50370 /* Immediate valuse used in an insn. */
50371 typedef struct imm_info_s
50378 static dispatch_windows *dispatch_window_list;
50379 static dispatch_windows *dispatch_window_list1;
50381 /* Get dispatch group of insn. */
50383 static enum dispatch_group
50384 get_mem_group (rtx_insn *insn)
50386 enum attr_memory memory;
50388 if (INSN_CODE (insn) < 0)
50389 return disp_no_group;
50390 memory = get_attr_memory (insn);
50391 if (memory == MEMORY_STORE)
50394 if (memory == MEMORY_LOAD)
50397 if (memory == MEMORY_BOTH)
50398 return disp_load_store;
50400 return disp_no_group;
50403 /* Return true if insn is a compare instruction. */
50406 is_cmp (rtx_insn *insn)
50408 enum attr_type type;
50410 type = get_attr_type (insn);
50411 return (type == TYPE_TEST
50412 || type == TYPE_ICMP
50413 || type == TYPE_FCMP
50414 || GET_CODE (PATTERN (insn)) == COMPARE);
50417 /* Return true if a dispatch violation encountered. */
50420 dispatch_violation (void)
50422 if (dispatch_window_list->next)
50423 return dispatch_window_list->next->violation;
50424 return dispatch_window_list->violation;
50427 /* Return true if insn is a branch instruction. */
50430 is_branch (rtx insn)
50432 return (CALL_P (insn) || JUMP_P (insn));
50435 /* Return true if insn is a prefetch instruction. */
50438 is_prefetch (rtx insn)
50440 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
50443 /* This function initializes a dispatch window and the list container holding a
50444 pointer to the window. */
50447 init_window (int window_num)
50450 dispatch_windows *new_list;
50452 if (window_num == 0)
50453 new_list = dispatch_window_list;
50455 new_list = dispatch_window_list1;
50457 new_list->num_insn = 0;
50458 new_list->num_uops = 0;
50459 new_list->window_size = 0;
50460 new_list->next = NULL;
50461 new_list->prev = NULL;
50462 new_list->window_num = window_num;
50463 new_list->num_imm = 0;
50464 new_list->num_imm_32 = 0;
50465 new_list->num_imm_64 = 0;
50466 new_list->imm_size = 0;
50467 new_list->num_loads = 0;
50468 new_list->num_stores = 0;
50469 new_list->violation = false;
50471 for (i = 0; i < MAX_INSN; i++)
50473 new_list->window[i].insn = NULL;
50474 new_list->window[i].group = disp_no_group;
50475 new_list->window[i].path = no_path;
50476 new_list->window[i].byte_len = 0;
50477 new_list->window[i].imm_bytes = 0;
50482 /* This function allocates and initializes a dispatch window and the
50483 list container holding a pointer to the window. */
50485 static dispatch_windows *
50486 allocate_window (void)
50488 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
50489 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
50494 /* This routine initializes the dispatch scheduling information. It
50495 initiates building dispatch scheduler tables and constructs the
50496 first dispatch window. */
50499 init_dispatch_sched (void)
50501 /* Allocate a dispatch list and a window. */
50502 dispatch_window_list = allocate_window ();
50503 dispatch_window_list1 = allocate_window ();
50508 /* This function returns true if a branch is detected. End of a basic block
50509 does not have to be a branch, but here we assume only branches end a
50513 is_end_basic_block (enum dispatch_group group)
50515 return group == disp_branch;
50518 /* This function is called when the end of a window processing is reached. */
50521 process_end_window (void)
50523 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
50524 if (dispatch_window_list->next)
50526 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
50527 gcc_assert (dispatch_window_list->window_size
50528 + dispatch_window_list1->window_size <= 48);
50534 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
50535 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
50536 for 48 bytes of instructions. Note that these windows are not dispatch
50537 windows that their sizes are DISPATCH_WINDOW_SIZE. */
50539 static dispatch_windows *
50540 allocate_next_window (int window_num)
50542 if (window_num == 0)
50544 if (dispatch_window_list->next)
50547 return dispatch_window_list;
50550 dispatch_window_list->next = dispatch_window_list1;
50551 dispatch_window_list1->prev = dispatch_window_list;
50553 return dispatch_window_list1;
50556 /* Compute number of immediate operands of an instruction. */
50559 find_constant (rtx in_rtx, imm_info *imm_values)
50561 if (INSN_P (in_rtx))
50562 in_rtx = PATTERN (in_rtx);
50563 subrtx_iterator::array_type array;
50564 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
50565 if (const_rtx x = *iter)
50566 switch (GET_CODE (x))
50571 (imm_values->imm)++;
50572 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
50573 (imm_values->imm32)++;
50575 (imm_values->imm64)++;
50579 (imm_values->imm)++;
50580 (imm_values->imm64)++;
50584 if (LABEL_KIND (x) == LABEL_NORMAL)
50586 (imm_values->imm)++;
50587 (imm_values->imm32)++;
50596 /* Return total size of immediate operands of an instruction along with number
50597 of corresponding immediate-operands. It initializes its parameters to zero
50598 befor calling FIND_CONSTANT.
50599 INSN is the input instruction. IMM is the total of immediates.
50600 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
50604 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
50606 imm_info imm_values = {0, 0, 0};
50608 find_constant (insn, &imm_values);
50609 *imm = imm_values.imm;
50610 *imm32 = imm_values.imm32;
50611 *imm64 = imm_values.imm64;
50612 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
50615 /* This function indicates if an operand of an instruction is an
50619 has_immediate (rtx insn)
50621 int num_imm_operand;
50622 int num_imm32_operand;
50623 int num_imm64_operand;
50626 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50627 &num_imm64_operand);
50631 /* Return single or double path for instructions. */
50633 static enum insn_path
50634 get_insn_path (rtx_insn *insn)
50636 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
50638 if ((int)path == 0)
50639 return path_single;
50641 if ((int)path == 1)
50642 return path_double;
50647 /* Return insn dispatch group. */
50649 static enum dispatch_group
50650 get_insn_group (rtx_insn *insn)
50652 enum dispatch_group group = get_mem_group (insn);
50656 if (is_branch (insn))
50657 return disp_branch;
50662 if (has_immediate (insn))
50665 if (is_prefetch (insn))
50666 return disp_prefetch;
50668 return disp_no_group;
50671 /* Count number of GROUP restricted instructions in a dispatch
50672 window WINDOW_LIST. */
50675 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
50677 enum dispatch_group group = get_insn_group (insn);
50679 int num_imm_operand;
50680 int num_imm32_operand;
50681 int num_imm64_operand;
50683 if (group == disp_no_group)
50686 if (group == disp_imm)
50688 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50689 &num_imm64_operand);
50690 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
50691 || num_imm_operand + window_list->num_imm > MAX_IMM
50692 || (num_imm32_operand > 0
50693 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
50694 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
50695 || (num_imm64_operand > 0
50696 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
50697 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
50698 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
50699 && num_imm64_operand > 0
50700 && ((window_list->num_imm_64 > 0
50701 && window_list->num_insn >= 2)
50702 || window_list->num_insn >= 3)))
50708 if ((group == disp_load_store
50709 && (window_list->num_loads >= MAX_LOAD
50710 || window_list->num_stores >= MAX_STORE))
50711 || ((group == disp_load
50712 || group == disp_prefetch)
50713 && window_list->num_loads >= MAX_LOAD)
50714 || (group == disp_store
50715 && window_list->num_stores >= MAX_STORE))
50721 /* This function returns true if insn satisfies dispatch rules on the
50722 last window scheduled. */
50725 fits_dispatch_window (rtx_insn *insn)
50727 dispatch_windows *window_list = dispatch_window_list;
50728 dispatch_windows *window_list_next = dispatch_window_list->next;
50729 unsigned int num_restrict;
50730 enum dispatch_group group = get_insn_group (insn);
50731 enum insn_path path = get_insn_path (insn);
50734 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
50735 instructions should be given the lowest priority in the
50736 scheduling process in Haifa scheduler to make sure they will be
50737 scheduled in the same dispatch window as the reference to them. */
50738 if (group == disp_jcc || group == disp_cmp)
50741 /* Check nonrestricted. */
50742 if (group == disp_no_group || group == disp_branch)
50745 /* Get last dispatch window. */
50746 if (window_list_next)
50747 window_list = window_list_next;
50749 if (window_list->window_num == 1)
50751 sum = window_list->prev->window_size + window_list->window_size;
50754 || (min_insn_size (insn) + sum) >= 48)
50755 /* Window 1 is full. Go for next window. */
50759 num_restrict = count_num_restricted (insn, window_list);
50761 if (num_restrict > num_allowable_groups[group])
50764 /* See if it fits in the first window. */
50765 if (window_list->window_num == 0)
50767 /* The first widow should have only single and double path
50769 if (path == path_double
50770 && (window_list->num_uops + 2) > MAX_INSN)
50772 else if (path != path_single)
50778 /* Add an instruction INSN with NUM_UOPS micro-operations to the
50779 dispatch window WINDOW_LIST. */
50782 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
50784 int byte_len = min_insn_size (insn);
50785 int num_insn = window_list->num_insn;
50787 sched_insn_info *window = window_list->window;
50788 enum dispatch_group group = get_insn_group (insn);
50789 enum insn_path path = get_insn_path (insn);
50790 int num_imm_operand;
50791 int num_imm32_operand;
50792 int num_imm64_operand;
50794 if (!window_list->violation && group != disp_cmp
50795 && !fits_dispatch_window (insn))
50796 window_list->violation = true;
50798 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50799 &num_imm64_operand);
50801 /* Initialize window with new instruction. */
50802 window[num_insn].insn = insn;
50803 window[num_insn].byte_len = byte_len;
50804 window[num_insn].group = group;
50805 window[num_insn].path = path;
50806 window[num_insn].imm_bytes = imm_size;
50808 window_list->window_size += byte_len;
50809 window_list->num_insn = num_insn + 1;
50810 window_list->num_uops = window_list->num_uops + num_uops;
50811 window_list->imm_size += imm_size;
50812 window_list->num_imm += num_imm_operand;
50813 window_list->num_imm_32 += num_imm32_operand;
50814 window_list->num_imm_64 += num_imm64_operand;
50816 if (group == disp_store)
50817 window_list->num_stores += 1;
50818 else if (group == disp_load
50819 || group == disp_prefetch)
50820 window_list->num_loads += 1;
50821 else if (group == disp_load_store)
50823 window_list->num_stores += 1;
50824 window_list->num_loads += 1;
50828 /* Adds a scheduled instruction, INSN, to the current dispatch window.
50829 If the total bytes of instructions or the number of instructions in
50830 the window exceed allowable, it allocates a new window. */
50833 add_to_dispatch_window (rtx_insn *insn)
50836 dispatch_windows *window_list;
50837 dispatch_windows *next_list;
50838 dispatch_windows *window0_list;
50839 enum insn_path path;
50840 enum dispatch_group insn_group;
50848 if (INSN_CODE (insn) < 0)
50851 byte_len = min_insn_size (insn);
50852 window_list = dispatch_window_list;
50853 next_list = window_list->next;
50854 path = get_insn_path (insn);
50855 insn_group = get_insn_group (insn);
50857 /* Get the last dispatch window. */
50859 window_list = dispatch_window_list->next;
50861 if (path == path_single)
50863 else if (path == path_double)
50866 insn_num_uops = (int) path;
50868 /* If current window is full, get a new window.
50869 Window number zero is full, if MAX_INSN uops are scheduled in it.
50870 Window number one is full, if window zero's bytes plus window
50871 one's bytes is 32, or if the bytes of the new instruction added
50872 to the total makes it greater than 48, or it has already MAX_INSN
50873 instructions in it. */
50874 num_insn = window_list->num_insn;
50875 num_uops = window_list->num_uops;
50876 window_num = window_list->window_num;
50877 insn_fits = fits_dispatch_window (insn);
50879 if (num_insn >= MAX_INSN
50880 || num_uops + insn_num_uops > MAX_INSN
50883 window_num = ~window_num & 1;
50884 window_list = allocate_next_window (window_num);
50887 if (window_num == 0)
50889 add_insn_window (insn, window_list, insn_num_uops);
50890 if (window_list->num_insn >= MAX_INSN
50891 && insn_group == disp_branch)
50893 process_end_window ();
50897 else if (window_num == 1)
50899 window0_list = window_list->prev;
50900 sum = window0_list->window_size + window_list->window_size;
50902 || (byte_len + sum) >= 48)
50904 process_end_window ();
50905 window_list = dispatch_window_list;
50908 add_insn_window (insn, window_list, insn_num_uops);
50911 gcc_unreachable ();
50913 if (is_end_basic_block (insn_group))
50915 /* End of basic block is reached do end-basic-block process. */
50916 process_end_window ();
50921 /* Print the dispatch window, WINDOW_NUM, to FILE. */
50923 DEBUG_FUNCTION static void
50924 debug_dispatch_window_file (FILE *file, int window_num)
50926 dispatch_windows *list;
50929 if (window_num == 0)
50930 list = dispatch_window_list;
50932 list = dispatch_window_list1;
50934 fprintf (file, "Window #%d:\n", list->window_num);
50935 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
50936 list->num_insn, list->num_uops, list->window_size);
50937 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
50938 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
50940 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
50942 fprintf (file, " insn info:\n");
50944 for (i = 0; i < MAX_INSN; i++)
50946 if (!list->window[i].insn)
50948 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
50949 i, group_name[list->window[i].group],
50950 i, (void *)list->window[i].insn,
50951 i, list->window[i].path,
50952 i, list->window[i].byte_len,
50953 i, list->window[i].imm_bytes);
50957 /* Print to stdout a dispatch window. */
50959 DEBUG_FUNCTION void
50960 debug_dispatch_window (int window_num)
50962 debug_dispatch_window_file (stdout, window_num);
50965 /* Print INSN dispatch information to FILE. */
50967 DEBUG_FUNCTION static void
50968 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
50971 enum insn_path path;
50972 enum dispatch_group group;
50974 int num_imm_operand;
50975 int num_imm32_operand;
50976 int num_imm64_operand;
50978 if (INSN_CODE (insn) < 0)
50981 byte_len = min_insn_size (insn);
50982 path = get_insn_path (insn);
50983 group = get_insn_group (insn);
50984 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50985 &num_imm64_operand);
50987 fprintf (file, " insn info:\n");
50988 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
50989 group_name[group], path, byte_len);
50990 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
50991 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
50994 /* Print to STDERR the status of the ready list with respect to
50995 dispatch windows. */
50997 DEBUG_FUNCTION void
50998 debug_ready_dispatch (void)
51001 int no_ready = number_in_ready ();
51003 fprintf (stdout, "Number of ready: %d\n", no_ready);
51005 for (i = 0; i < no_ready; i++)
51006 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
51009 /* This routine is the driver of the dispatch scheduler. */
51012 do_dispatch (rtx_insn *insn, int mode)
51014 if (mode == DISPATCH_INIT)
51015 init_dispatch_sched ();
51016 else if (mode == ADD_TO_DISPATCH_WINDOW)
51017 add_to_dispatch_window (insn);
51020 /* Return TRUE if Dispatch Scheduling is supported. */
51023 has_dispatch (rtx_insn *insn, int action)
51025 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
51026 && flag_dispatch_scheduler)
51032 case IS_DISPATCH_ON:
51037 return is_cmp (insn);
51039 case DISPATCH_VIOLATION:
51040 return dispatch_violation ();
51042 case FITS_DISPATCH_WINDOW:
51043 return fits_dispatch_window (insn);
51049 /* Implementation of reassociation_width target hook used by
51050 reassoc phase to identify parallelism level in reassociated
51051 tree. Statements tree_code is passed in OPC. Arguments type
51054 Currently parallel reassociation is enabled for Atom
51055 processors only and we set reassociation width to be 2
51056 because Atom may issue up to 2 instructions per cycle.
51058 Return value should be fixed if parallel reassociation is
51059 enabled for other processors. */
51062 ix86_reassociation_width (unsigned int, machine_mode mode)
51065 if (VECTOR_MODE_P (mode))
51067 if (TARGET_VECTOR_PARALLEL_EXECUTION)
51074 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
51076 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
51082 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
51083 place emms and femms instructions. */
51085 static machine_mode
51086 ix86_preferred_simd_mode (machine_mode mode)
51094 return TARGET_AVX512BW ? V64QImode :
51095 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
51097 return TARGET_AVX512BW ? V32HImode :
51098 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
51100 return TARGET_AVX512F ? V16SImode :
51101 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
51103 return TARGET_AVX512F ? V8DImode :
51104 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
51107 if (TARGET_AVX512F)
51109 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51115 if (!TARGET_VECTORIZE_DOUBLE)
51117 else if (TARGET_AVX512F)
51119 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51121 else if (TARGET_SSE2)
51130 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
51131 vectors. If AVX512F is enabled then try vectorizing with 512bit,
51132 256bit and 128bit vectors. */
51134 static unsigned int
51135 ix86_autovectorize_vector_sizes (void)
51137 return TARGET_AVX512F ? 64 | 32 | 16 :
51138 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
51143 /* Return class of registers which could be used for pseudo of MODE
51144 and of class RCLASS for spilling instead of memory. Return NO_REGS
51145 if it is not possible or non-profitable. */
51147 ix86_spill_class (reg_class_t rclass, machine_mode mode)
51149 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
51150 && (mode == SImode || (TARGET_64BIT && mode == DImode))
51151 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
51152 return ALL_SSE_REGS;
51156 /* Implement targetm.vectorize.init_cost. */
51159 ix86_init_cost (struct loop *)
51161 unsigned *cost = XNEWVEC (unsigned, 3);
51162 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51166 /* Implement targetm.vectorize.add_stmt_cost. */
51169 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51170 struct _stmt_vec_info *stmt_info, int misalign,
51171 enum vect_cost_model_location where)
51173 unsigned *cost = (unsigned *) data;
51174 unsigned retval = 0;
51176 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51177 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51179 /* Statements in an inner loop relative to the loop being
51180 vectorized are weighted more heavily. The value here is
51181 arbitrary and could potentially be improved with analysis. */
51182 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51183 count *= 50; /* FIXME. */
51185 retval = (unsigned) (count * stmt_cost);
51187 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51188 for Silvermont as it has out of order integer pipeline and can execute
51189 2 scalar instruction per tick, but has in order SIMD pipeline. */
51190 if (TARGET_SILVERMONT || TARGET_INTEL)
51191 if (stmt_info && stmt_info->stmt)
51193 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51194 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51195 retval = (retval * 17) / 10;
51198 cost[where] += retval;
51203 /* Implement targetm.vectorize.finish_cost. */
51206 ix86_finish_cost (void *data, unsigned *prologue_cost,
51207 unsigned *body_cost, unsigned *epilogue_cost)
51209 unsigned *cost = (unsigned *) data;
51210 *prologue_cost = cost[vect_prologue];
51211 *body_cost = cost[vect_body];
51212 *epilogue_cost = cost[vect_epilogue];
51215 /* Implement targetm.vectorize.destroy_cost_data. */
51218 ix86_destroy_cost_data (void *data)
51223 /* Validate target specific memory model bits in VAL. */
51225 static unsigned HOST_WIDE_INT
51226 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51228 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
51231 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51233 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51235 warning (OPT_Winvalid_memory_model,
51236 "Unknown architecture specific memory model");
51237 return MEMMODEL_SEQ_CST;
51239 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
51240 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
51242 warning (OPT_Winvalid_memory_model,
51243 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51244 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51246 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
51248 warning (OPT_Winvalid_memory_model,
51249 "HLE_RELEASE not used with RELEASE or stronger memory model");
51250 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51255 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51256 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51257 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
51258 or number of vecsize_mangle variants that should be emitted. */
51261 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51262 struct cgraph_simd_clone *clonei,
51263 tree base_type, int num)
51267 if (clonei->simdlen
51268 && (clonei->simdlen < 2
51269 || clonei->simdlen > 16
51270 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51272 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51273 "unsupported simdlen %d", clonei->simdlen);
51277 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51278 if (TREE_CODE (ret_type) != VOID_TYPE)
51279 switch (TYPE_MODE (ret_type))
51291 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51292 "unsupported return type %qT for simd\n", ret_type);
51299 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51300 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
51301 switch (TYPE_MODE (TREE_TYPE (t)))
51313 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51314 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51318 if (clonei->cilk_elemental)
51320 /* Parse here processor clause. If not present, default to 'b'. */
51321 clonei->vecsize_mangle = 'b';
51323 else if (!TREE_PUBLIC (node->decl))
51325 /* If the function isn't exported, we can pick up just one ISA
51328 clonei->vecsize_mangle = 'd';
51329 else if (TARGET_AVX)
51330 clonei->vecsize_mangle = 'c';
51332 clonei->vecsize_mangle = 'b';
51337 clonei->vecsize_mangle = "bcd"[num];
51340 switch (clonei->vecsize_mangle)
51343 clonei->vecsize_int = 128;
51344 clonei->vecsize_float = 128;
51347 clonei->vecsize_int = 128;
51348 clonei->vecsize_float = 256;
51351 clonei->vecsize_int = 256;
51352 clonei->vecsize_float = 256;
51355 if (clonei->simdlen == 0)
51357 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51358 clonei->simdlen = clonei->vecsize_int;
51360 clonei->simdlen = clonei->vecsize_float;
51361 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51362 if (clonei->simdlen > 16)
51363 clonei->simdlen = 16;
51368 /* Add target attribute to SIMD clone NODE if needed. */
51371 ix86_simd_clone_adjust (struct cgraph_node *node)
51373 const char *str = NULL;
51374 gcc_assert (node->decl == cfun->decl);
51375 switch (node->simdclone->vecsize_mangle)
51390 gcc_unreachable ();
51395 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51396 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51399 ix86_reset_previous_fndecl ();
51400 ix86_set_current_function (node->decl);
51403 /* If SIMD clone NODE can't be used in a vectorized loop
51404 in current function, return -1, otherwise return a badness of using it
51405 (0 if it is most desirable from vecsize_mangle point of view, 1
51406 slightly less desirable, etc.). */
51409 ix86_simd_clone_usable (struct cgraph_node *node)
51411 switch (node->simdclone->vecsize_mangle)
51418 return TARGET_AVX2 ? 2 : 1;
51422 return TARGET_AVX2 ? 1 : 0;
51429 gcc_unreachable ();
51433 /* This function adjusts the unroll factor based on
51434 the hardware capabilities. For ex, bdver3 has
51435 a loop buffer which makes unrolling of smaller
51436 loops less important. This function decides the
51437 unroll factor using number of memory references
51438 (value 32 is used) as a heuristic. */
51441 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
51446 unsigned mem_count = 0;
51448 if (!TARGET_ADJUST_UNROLL)
51451 /* Count the number of memory references within the loop body.
51452 This value determines the unrolling factor for bdver3 and bdver4
51454 subrtx_iterator::array_type array;
51455 bbs = get_loop_body (loop);
51456 for (i = 0; i < loop->num_nodes; i++)
51457 FOR_BB_INSNS (bbs[i], insn)
51458 if (NONDEBUG_INSN_P (insn))
51459 FOR_EACH_SUBRTX (iter, array, insn, NONCONST)
51460 if (const_rtx x = *iter)
51463 machine_mode mode = GET_MODE (x);
51464 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
51472 if (mem_count && mem_count <=32)
51473 return 32/mem_count;
51479 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
51482 ix86_float_exceptions_rounding_supported_p (void)
51484 /* For x87 floating point with standard excess precision handling,
51485 there is no adddf3 pattern (since x87 floating point only has
51486 XFmode operations) so the default hook implementation gets this
51488 return TARGET_80387 || TARGET_SSE_MATH;
51491 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
51494 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
51496 if (!TARGET_80387 && !TARGET_SSE_MATH)
51498 tree exceptions_var = create_tmp_var (integer_type_node);
51501 tree fenv_index_type = build_index_type (size_int (6));
51502 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
51503 tree fenv_var = create_tmp_var (fenv_type);
51504 mark_addressable (fenv_var);
51505 tree fenv_ptr = build_pointer_type (fenv_type);
51506 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
51507 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
51508 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
51509 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
51510 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
51511 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
51512 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
51513 tree hold_fnclex = build_call_expr (fnclex, 0);
51514 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
51516 *clear = build_call_expr (fnclex, 0);
51517 tree sw_var = create_tmp_var (short_unsigned_type_node);
51518 tree fnstsw_call = build_call_expr (fnstsw, 0);
51519 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
51520 sw_var, fnstsw_call);
51521 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
51522 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
51523 exceptions_var, exceptions_x87);
51524 *update = build2 (COMPOUND_EXPR, integer_type_node,
51525 sw_mod, update_mod);
51526 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
51527 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
51529 if (TARGET_SSE_MATH)
51531 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node);
51532 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node);
51533 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
51534 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
51535 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
51536 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
51537 mxcsr_orig_var, stmxcsr_hold_call);
51538 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
51540 build_int_cst (unsigned_type_node, 0x1f80));
51541 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
51542 build_int_cst (unsigned_type_node, 0xffffffc0));
51543 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
51544 mxcsr_mod_var, hold_mod_val);
51545 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51546 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
51547 hold_assign_orig, hold_assign_mod);
51548 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
51549 ldmxcsr_hold_call);
51551 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
51554 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51556 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
51557 ldmxcsr_clear_call);
51559 *clear = ldmxcsr_clear_call;
51560 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
51561 tree exceptions_sse = fold_convert (integer_type_node,
51562 stxmcsr_update_call);
51565 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
51566 exceptions_var, exceptions_sse);
51567 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
51568 exceptions_var, exceptions_mod);
51569 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
51570 exceptions_assign);
51573 *update = build2 (MODIFY_EXPR, integer_type_node,
51574 exceptions_var, exceptions_sse);
51575 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
51576 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51577 ldmxcsr_update_call);
51579 tree atomic_feraiseexcept
51580 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
51581 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
51582 1, exceptions_var);
51583 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51584 atomic_feraiseexcept_call);
51587 /* Return mode to be used for bounds or VOIDmode
51588 if bounds are not supported. */
51590 static enum machine_mode
51591 ix86_mpx_bound_mode ()
51593 /* Do not support pointer checker if MPX
51597 if (flag_check_pointer_bounds)
51598 warning (0, "Pointer Checker requires MPX support on this target."
51599 " Use -mmpx options to enable MPX.");
51606 /* Return constant used to statically initialize constant bounds.
51608 This function is used to create special bound values. For now
51609 only INIT bounds and NONE bounds are expected. More special
51610 values may be added later. */
51613 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
51615 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
51616 : build_zero_cst (pointer_sized_int_node);
51617 tree high = ub ? build_zero_cst (pointer_sized_int_node)
51618 : build_minus_one_cst (pointer_sized_int_node);
51620 /* This function is supposed to be used to create INIT and
51621 NONE bounds only. */
51622 gcc_assert ((lb == 0 && ub == -1)
51623 || (lb == -1 && ub == 0));
51625 return build_complex (NULL, low, high);
51628 /* Generate a list of statements STMTS to initialize pointer bounds
51629 variable VAR with bounds LB and UB. Return the number of generated
51633 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
51635 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
51636 tree lhs, modify, var_p;
51638 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
51639 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
51641 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
51642 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
51643 append_to_statement_list (modify, stmts);
51645 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
51646 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
51647 TYPE_SIZE_UNIT (pointer_sized_int_node)));
51648 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
51649 append_to_statement_list (modify, stmts);
51654 /* Initialize the GCC target structure. */
51655 #undef TARGET_RETURN_IN_MEMORY
51656 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
51658 #undef TARGET_LEGITIMIZE_ADDRESS
51659 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
51661 #undef TARGET_ATTRIBUTE_TABLE
51662 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
51663 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
51664 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
51665 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51666 # undef TARGET_MERGE_DECL_ATTRIBUTES
51667 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
51670 #undef TARGET_COMP_TYPE_ATTRIBUTES
51671 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
51673 #undef TARGET_INIT_BUILTINS
51674 #define TARGET_INIT_BUILTINS ix86_init_builtins
51675 #undef TARGET_BUILTIN_DECL
51676 #define TARGET_BUILTIN_DECL ix86_builtin_decl
51677 #undef TARGET_EXPAND_BUILTIN
51678 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
51680 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
51681 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
51682 ix86_builtin_vectorized_function
51684 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
51685 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
51687 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
51688 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
51690 #undef TARGET_VECTORIZE_BUILTIN_GATHER
51691 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
51693 #undef TARGET_BUILTIN_RECIPROCAL
51694 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
51696 #undef TARGET_ASM_FUNCTION_EPILOGUE
51697 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
51699 #undef TARGET_ENCODE_SECTION_INFO
51700 #ifndef SUBTARGET_ENCODE_SECTION_INFO
51701 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
51703 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
51706 #undef TARGET_ASM_OPEN_PAREN
51707 #define TARGET_ASM_OPEN_PAREN ""
51708 #undef TARGET_ASM_CLOSE_PAREN
51709 #define TARGET_ASM_CLOSE_PAREN ""
51711 #undef TARGET_ASM_BYTE_OP
51712 #define TARGET_ASM_BYTE_OP ASM_BYTE
51714 #undef TARGET_ASM_ALIGNED_HI_OP
51715 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
51716 #undef TARGET_ASM_ALIGNED_SI_OP
51717 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
51719 #undef TARGET_ASM_ALIGNED_DI_OP
51720 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
51723 #undef TARGET_PROFILE_BEFORE_PROLOGUE
51724 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
51726 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
51727 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
51729 #undef TARGET_ASM_UNALIGNED_HI_OP
51730 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
51731 #undef TARGET_ASM_UNALIGNED_SI_OP
51732 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
51733 #undef TARGET_ASM_UNALIGNED_DI_OP
51734 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
51736 #undef TARGET_PRINT_OPERAND
51737 #define TARGET_PRINT_OPERAND ix86_print_operand
51738 #undef TARGET_PRINT_OPERAND_ADDRESS
51739 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
51740 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
51741 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
51742 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
51743 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
51745 #undef TARGET_SCHED_INIT_GLOBAL
51746 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
51747 #undef TARGET_SCHED_ADJUST_COST
51748 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
51749 #undef TARGET_SCHED_ISSUE_RATE
51750 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
51751 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
51752 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
51753 ia32_multipass_dfa_lookahead
51754 #undef TARGET_SCHED_MACRO_FUSION_P
51755 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
51756 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
51757 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
51759 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
51760 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
51762 #undef TARGET_MEMMODEL_CHECK
51763 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
51765 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
51766 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
51769 #undef TARGET_HAVE_TLS
51770 #define TARGET_HAVE_TLS true
51772 #undef TARGET_CANNOT_FORCE_CONST_MEM
51773 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
51774 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
51775 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
51777 #undef TARGET_DELEGITIMIZE_ADDRESS
51778 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
51780 #undef TARGET_MS_BITFIELD_LAYOUT_P
51781 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
51784 #undef TARGET_BINDS_LOCAL_P
51785 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
51787 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51788 #undef TARGET_BINDS_LOCAL_P
51789 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
51792 #undef TARGET_ASM_OUTPUT_MI_THUNK
51793 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
51794 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
51795 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
51797 #undef TARGET_ASM_FILE_START
51798 #define TARGET_ASM_FILE_START x86_file_start
51800 #undef TARGET_OPTION_OVERRIDE
51801 #define TARGET_OPTION_OVERRIDE ix86_option_override
51803 #undef TARGET_REGISTER_MOVE_COST
51804 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
51805 #undef TARGET_MEMORY_MOVE_COST
51806 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
51807 #undef TARGET_RTX_COSTS
51808 #define TARGET_RTX_COSTS ix86_rtx_costs
51809 #undef TARGET_ADDRESS_COST
51810 #define TARGET_ADDRESS_COST ix86_address_cost
51812 #undef TARGET_FIXED_CONDITION_CODE_REGS
51813 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
51814 #undef TARGET_CC_MODES_COMPATIBLE
51815 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
51817 #undef TARGET_MACHINE_DEPENDENT_REORG
51818 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
51820 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
51821 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
51823 #undef TARGET_BUILD_BUILTIN_VA_LIST
51824 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
51826 #undef TARGET_FOLD_BUILTIN
51827 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
51829 #undef TARGET_COMPARE_VERSION_PRIORITY
51830 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
51832 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
51833 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
51834 ix86_generate_version_dispatcher_body
51836 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
51837 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
51838 ix86_get_function_versions_dispatcher
51840 #undef TARGET_ENUM_VA_LIST_P
51841 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
51843 #undef TARGET_FN_ABI_VA_LIST
51844 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
51846 #undef TARGET_CANONICAL_VA_LIST_TYPE
51847 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
51849 #undef TARGET_EXPAND_BUILTIN_VA_START
51850 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
51852 #undef TARGET_MD_ASM_CLOBBERS
51853 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
51855 #undef TARGET_PROMOTE_PROTOTYPES
51856 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
51857 #undef TARGET_SETUP_INCOMING_VARARGS
51858 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
51859 #undef TARGET_MUST_PASS_IN_STACK
51860 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
51861 #undef TARGET_FUNCTION_ARG_ADVANCE
51862 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
51863 #undef TARGET_FUNCTION_ARG
51864 #define TARGET_FUNCTION_ARG ix86_function_arg
51865 #undef TARGET_INIT_PIC_REG
51866 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
51867 #undef TARGET_USE_PSEUDO_PIC_REG
51868 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
51869 #undef TARGET_FUNCTION_ARG_BOUNDARY
51870 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
51871 #undef TARGET_PASS_BY_REFERENCE
51872 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
51873 #undef TARGET_INTERNAL_ARG_POINTER
51874 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
51875 #undef TARGET_UPDATE_STACK_BOUNDARY
51876 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
51877 #undef TARGET_GET_DRAP_RTX
51878 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
51879 #undef TARGET_STRICT_ARGUMENT_NAMING
51880 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
51881 #undef TARGET_STATIC_CHAIN
51882 #define TARGET_STATIC_CHAIN ix86_static_chain
51883 #undef TARGET_TRAMPOLINE_INIT
51884 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
51885 #undef TARGET_RETURN_POPS_ARGS
51886 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
51888 #undef TARGET_LEGITIMATE_COMBINED_INSN
51889 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
51891 #undef TARGET_ASAN_SHADOW_OFFSET
51892 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
51894 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
51895 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
51897 #undef TARGET_SCALAR_MODE_SUPPORTED_P
51898 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
51900 #undef TARGET_VECTOR_MODE_SUPPORTED_P
51901 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
51903 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
51904 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
51905 ix86_libgcc_floating_mode_supported_p
51907 #undef TARGET_C_MODE_FOR_SUFFIX
51908 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
51911 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
51912 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
51915 #ifdef SUBTARGET_INSERT_ATTRIBUTES
51916 #undef TARGET_INSERT_ATTRIBUTES
51917 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
51920 #undef TARGET_MANGLE_TYPE
51921 #define TARGET_MANGLE_TYPE ix86_mangle_type
51924 #undef TARGET_STACK_PROTECT_FAIL
51925 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
51928 #undef TARGET_FUNCTION_VALUE
51929 #define TARGET_FUNCTION_VALUE ix86_function_value
51931 #undef TARGET_FUNCTION_VALUE_REGNO_P
51932 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
51934 #undef TARGET_PROMOTE_FUNCTION_MODE
51935 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
51937 #undef TARGET_MEMBER_TYPE_FORCES_BLK
51938 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
51940 #undef TARGET_INSTANTIATE_DECLS
51941 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
51943 #undef TARGET_SECONDARY_RELOAD
51944 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
51946 #undef TARGET_CLASS_MAX_NREGS
51947 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
51949 #undef TARGET_PREFERRED_RELOAD_CLASS
51950 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
51951 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
51952 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
51953 #undef TARGET_CLASS_LIKELY_SPILLED_P
51954 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
51956 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
51957 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
51958 ix86_builtin_vectorization_cost
51959 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
51960 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
51961 ix86_vectorize_vec_perm_const_ok
51962 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
51963 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
51964 ix86_preferred_simd_mode
51965 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
51966 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
51967 ix86_autovectorize_vector_sizes
51968 #undef TARGET_VECTORIZE_INIT_COST
51969 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
51970 #undef TARGET_VECTORIZE_ADD_STMT_COST
51971 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
51972 #undef TARGET_VECTORIZE_FINISH_COST
51973 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
51974 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
51975 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
51977 #undef TARGET_SET_CURRENT_FUNCTION
51978 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
51980 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
51981 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
51983 #undef TARGET_OPTION_SAVE
51984 #define TARGET_OPTION_SAVE ix86_function_specific_save
51986 #undef TARGET_OPTION_RESTORE
51987 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
51989 #undef TARGET_OPTION_PRINT
51990 #define TARGET_OPTION_PRINT ix86_function_specific_print
51992 #undef TARGET_OPTION_FUNCTION_VERSIONS
51993 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
51995 #undef TARGET_CAN_INLINE_P
51996 #define TARGET_CAN_INLINE_P ix86_can_inline_p
51998 #undef TARGET_EXPAND_TO_RTL_HOOK
51999 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
52001 #undef TARGET_LEGITIMATE_ADDRESS_P
52002 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
52004 #undef TARGET_LRA_P
52005 #define TARGET_LRA_P hook_bool_void_true
52007 #undef TARGET_REGISTER_PRIORITY
52008 #define TARGET_REGISTER_PRIORITY ix86_register_priority
52010 #undef TARGET_REGISTER_USAGE_LEVELING_P
52011 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
52013 #undef TARGET_LEGITIMATE_CONSTANT_P
52014 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
52016 #undef TARGET_FRAME_POINTER_REQUIRED
52017 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
52019 #undef TARGET_CAN_ELIMINATE
52020 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
52022 #undef TARGET_EXTRA_LIVE_ON_ENTRY
52023 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
52025 #undef TARGET_ASM_CODE_END
52026 #define TARGET_ASM_CODE_END ix86_code_end
52028 #undef TARGET_CONDITIONAL_REGISTER_USAGE
52029 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
52032 #undef TARGET_INIT_LIBFUNCS
52033 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
52036 #undef TARGET_LOOP_UNROLL_ADJUST
52037 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
52039 #undef TARGET_SPILL_CLASS
52040 #define TARGET_SPILL_CLASS ix86_spill_class
52042 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
52043 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
52044 ix86_simd_clone_compute_vecsize_and_simdlen
52046 #undef TARGET_SIMD_CLONE_ADJUST
52047 #define TARGET_SIMD_CLONE_ADJUST \
52048 ix86_simd_clone_adjust
52050 #undef TARGET_SIMD_CLONE_USABLE
52051 #define TARGET_SIMD_CLONE_USABLE \
52052 ix86_simd_clone_usable
52054 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
52055 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
52056 ix86_float_exceptions_rounding_supported_p
52058 #undef TARGET_MODE_EMIT
52059 #define TARGET_MODE_EMIT ix86_emit_mode_set
52061 #undef TARGET_MODE_NEEDED
52062 #define TARGET_MODE_NEEDED ix86_mode_needed
52064 #undef TARGET_MODE_AFTER
52065 #define TARGET_MODE_AFTER ix86_mode_after
52067 #undef TARGET_MODE_ENTRY
52068 #define TARGET_MODE_ENTRY ix86_mode_entry
52070 #undef TARGET_MODE_EXIT
52071 #define TARGET_MODE_EXIT ix86_mode_exit
52073 #undef TARGET_MODE_PRIORITY
52074 #define TARGET_MODE_PRIORITY ix86_mode_priority
52076 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
52077 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
52079 #undef TARGET_LOAD_BOUNDS_FOR_ARG
52080 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
52082 #undef TARGET_STORE_BOUNDS_FOR_ARG
52083 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
52085 #undef TARGET_LOAD_RETURNED_BOUNDS
52086 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
52088 #undef TARGET_STORE_RETURNED_BOUNDS
52089 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
52091 #undef TARGET_CHKP_BOUND_MODE
52092 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
52094 #undef TARGET_BUILTIN_CHKP_FUNCTION
52095 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
52097 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
52098 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
52100 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
52101 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
52103 #undef TARGET_CHKP_INITIALIZE_BOUNDS
52104 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
52106 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
52107 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
52109 #undef TARGET_OFFLOAD_OPTIONS
52110 #define TARGET_OFFLOAD_OPTIONS \
52111 ix86_offload_options
52113 struct gcc_target targetm = TARGET_INITIALIZER;
52115 #include "gt-i386.h"