1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
28 #include "double-int.h"
35 #include "fold-const.h"
36 #include "stringpool.h"
39 #include "stor-layout.h"
43 #include "hard-reg-set.h"
44 #include "insn-config.h"
45 #include "conditions.h"
47 #include "insn-codes.h"
48 #include "insn-attr.h"
56 #include "diagnostic-core.h"
59 #include "dominance.h"
65 #include "cfgcleanup.h"
66 #include "basic-block.h"
69 #include "target-def.h"
70 #include "common/common-target.h"
71 #include "langhooks.h"
75 #include "plugin-api.h"
78 #include "hash-table.h"
79 #include "tree-ssa-alias.h"
80 #include "internal-fn.h"
81 #include "gimple-fold.h"
83 #include "gimple-expr.h"
89 #include "tm-constrs.h"
93 #include "sched-int.h"
97 #include "diagnostic.h"
99 #include "tree-pass.h"
100 #include "wide-int.h"
102 #include "pass_manager.h"
103 #include "target-globals.h"
104 #include "tree-vectorizer.h"
105 #include "shrink-wrap.h"
106 #include "builtins.h"
107 #include "rtl-iter.h"
108 #include "tree-iterator.h"
109 #include "tree-chkp.h"
110 #include "rtl-chkp.h"
112 static rtx legitimize_dllimport_symbol (rtx, bool);
113 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
114 static rtx legitimize_pe_coff_symbol (rtx, bool);
116 #ifndef CHECK_STACK_LIMIT
117 #define CHECK_STACK_LIMIT (-1)
120 /* Return index of given mode in mult and division cost tables. */
121 #define MODE_INDEX(mode) \
122 ((mode) == QImode ? 0 \
123 : (mode) == HImode ? 1 \
124 : (mode) == SImode ? 2 \
125 : (mode) == DImode ? 3 \
128 /* Processor costs (relative to an add) */
129 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
130 #define COSTS_N_BYTES(N) ((N) * 2)
132 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
134 static stringop_algs ix86_size_memcpy[2] = {
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
136 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
137 static stringop_algs ix86_size_memset[2] = {
138 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
139 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
142 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
143 COSTS_N_BYTES (2), /* cost of an add instruction */
144 COSTS_N_BYTES (3), /* cost of a lea instruction */
145 COSTS_N_BYTES (2), /* variable shift costs */
146 COSTS_N_BYTES (3), /* constant shift costs */
147 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
148 COSTS_N_BYTES (3), /* HI */
149 COSTS_N_BYTES (3), /* SI */
150 COSTS_N_BYTES (3), /* DI */
151 COSTS_N_BYTES (5)}, /* other */
152 0, /* cost of multiply per each bit set */
153 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
154 COSTS_N_BYTES (3), /* HI */
155 COSTS_N_BYTES (3), /* SI */
156 COSTS_N_BYTES (3), /* DI */
157 COSTS_N_BYTES (5)}, /* other */
158 COSTS_N_BYTES (3), /* cost of movsx */
159 COSTS_N_BYTES (3), /* cost of movzx */
160 0, /* "large" insn */
162 2, /* cost for loading QImode using movzbl */
163 {2, 2, 2}, /* cost of loading integer registers
164 in QImode, HImode and SImode.
165 Relative to reg-reg move (2). */
166 {2, 2, 2}, /* cost of storing integer registers */
167 2, /* cost of reg,reg fld/fst */
168 {2, 2, 2}, /* cost of loading fp registers
169 in SFmode, DFmode and XFmode */
170 {2, 2, 2}, /* cost of storing fp registers
171 in SFmode, DFmode and XFmode */
172 3, /* cost of moving MMX register */
173 {3, 3}, /* cost of loading MMX registers
174 in SImode and DImode */
175 {3, 3}, /* cost of storing MMX registers
176 in SImode and DImode */
177 3, /* cost of moving SSE register */
178 {3, 3, 3}, /* cost of loading SSE registers
179 in SImode, DImode and TImode */
180 {3, 3, 3}, /* cost of storing SSE registers
181 in SImode, DImode and TImode */
182 3, /* MMX or SSE register to integer */
183 0, /* size of l1 cache */
184 0, /* size of l2 cache */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
188 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
189 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
190 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
191 COSTS_N_BYTES (2), /* cost of FABS instruction. */
192 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
193 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
196 1, /* scalar_stmt_cost. */
197 1, /* scalar load_cost. */
198 1, /* scalar_store_cost. */
199 1, /* vec_stmt_cost. */
200 1, /* vec_to_scalar_cost. */
201 1, /* scalar_to_vec_cost. */
202 1, /* vec_align_load_cost. */
203 1, /* vec_unalign_load_cost. */
204 1, /* vec_store_cost. */
205 1, /* cond_taken_branch_cost. */
206 1, /* cond_not_taken_branch_cost. */
209 /* Processor costs (relative to an add) */
210 static stringop_algs i386_memcpy[2] = {
211 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
212 DUMMY_STRINGOP_ALGS};
213 static stringop_algs i386_memset[2] = {
214 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
215 DUMMY_STRINGOP_ALGS};
218 struct processor_costs i386_cost = { /* 386 specific costs */
219 COSTS_N_INSNS (1), /* cost of an add instruction */
220 COSTS_N_INSNS (1), /* cost of a lea instruction */
221 COSTS_N_INSNS (3), /* variable shift costs */
222 COSTS_N_INSNS (2), /* constant shift costs */
223 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
224 COSTS_N_INSNS (6), /* HI */
225 COSTS_N_INSNS (6), /* SI */
226 COSTS_N_INSNS (6), /* DI */
227 COSTS_N_INSNS (6)}, /* other */
228 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
229 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
230 COSTS_N_INSNS (23), /* HI */
231 COSTS_N_INSNS (23), /* SI */
232 COSTS_N_INSNS (23), /* DI */
233 COSTS_N_INSNS (23)}, /* other */
234 COSTS_N_INSNS (3), /* cost of movsx */
235 COSTS_N_INSNS (2), /* cost of movzx */
236 15, /* "large" insn */
238 4, /* cost for loading QImode using movzbl */
239 {2, 4, 2}, /* cost of loading integer registers
240 in QImode, HImode and SImode.
241 Relative to reg-reg move (2). */
242 {2, 4, 2}, /* cost of storing integer registers */
243 2, /* cost of reg,reg fld/fst */
244 {8, 8, 8}, /* cost of loading fp registers
245 in SFmode, DFmode and XFmode */
246 {8, 8, 8}, /* cost of storing fp registers
247 in SFmode, DFmode and XFmode */
248 2, /* cost of moving MMX register */
249 {4, 8}, /* cost of loading MMX registers
250 in SImode and DImode */
251 {4, 8}, /* cost of storing MMX registers
252 in SImode and DImode */
253 2, /* cost of moving SSE register */
254 {4, 8, 16}, /* cost of loading SSE registers
255 in SImode, DImode and TImode */
256 {4, 8, 16}, /* cost of storing SSE registers
257 in SImode, DImode and TImode */
258 3, /* MMX or SSE register to integer */
259 0, /* size of l1 cache */
260 0, /* size of l2 cache */
261 0, /* size of prefetch block */
262 0, /* number of parallel prefetches */
264 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
265 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
266 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
267 COSTS_N_INSNS (22), /* cost of FABS instruction. */
268 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
269 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
272 1, /* scalar_stmt_cost. */
273 1, /* scalar load_cost. */
274 1, /* scalar_store_cost. */
275 1, /* vec_stmt_cost. */
276 1, /* vec_to_scalar_cost. */
277 1, /* scalar_to_vec_cost. */
278 1, /* vec_align_load_cost. */
279 2, /* vec_unalign_load_cost. */
280 1, /* vec_store_cost. */
281 3, /* cond_taken_branch_cost. */
282 1, /* cond_not_taken_branch_cost. */
285 static stringop_algs i486_memcpy[2] = {
286 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
287 DUMMY_STRINGOP_ALGS};
288 static stringop_algs i486_memset[2] = {
289 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
290 DUMMY_STRINGOP_ALGS};
293 struct processor_costs i486_cost = { /* 486 specific costs */
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (3), /* variable shift costs */
297 COSTS_N_INSNS (2), /* constant shift costs */
298 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (12), /* HI */
300 COSTS_N_INSNS (12), /* SI */
301 COSTS_N_INSNS (12), /* DI */
302 COSTS_N_INSNS (12)}, /* other */
303 1, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (40), /* HI */
306 COSTS_N_INSNS (40), /* SI */
307 COSTS_N_INSNS (40), /* DI */
308 COSTS_N_INSNS (40)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 15, /* "large" insn */
313 4, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {8, 8, 8}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {8, 8, 8}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 2, /* cost of moving MMX register */
324 {4, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {4, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 4, /* size of l1 cache. 486 has 8kB cache
335 shared for code and data, so 4kB is
336 not really precise. */
337 4, /* size of l2 cache */
338 0, /* size of prefetch block */
339 0, /* number of parallel prefetches */
341 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
342 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
343 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
344 COSTS_N_INSNS (3), /* cost of FABS instruction. */
345 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
346 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
362 static stringop_algs pentium_memcpy[2] = {
363 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
364 DUMMY_STRINGOP_ALGS};
365 static stringop_algs pentium_memset[2] = {
366 {libcall, {{-1, rep_prefix_4_byte, false}}},
367 DUMMY_STRINGOP_ALGS};
370 struct processor_costs pentium_cost = {
371 COSTS_N_INSNS (1), /* cost of an add instruction */
372 COSTS_N_INSNS (1), /* cost of a lea instruction */
373 COSTS_N_INSNS (4), /* variable shift costs */
374 COSTS_N_INSNS (1), /* constant shift costs */
375 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
376 COSTS_N_INSNS (11), /* HI */
377 COSTS_N_INSNS (11), /* SI */
378 COSTS_N_INSNS (11), /* DI */
379 COSTS_N_INSNS (11)}, /* other */
380 0, /* cost of multiply per each bit set */
381 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
382 COSTS_N_INSNS (25), /* HI */
383 COSTS_N_INSNS (25), /* SI */
384 COSTS_N_INSNS (25), /* DI */
385 COSTS_N_INSNS (25)}, /* other */
386 COSTS_N_INSNS (3), /* cost of movsx */
387 COSTS_N_INSNS (2), /* cost of movzx */
388 8, /* "large" insn */
390 6, /* cost for loading QImode using movzbl */
391 {2, 4, 2}, /* cost of loading integer registers
392 in QImode, HImode and SImode.
393 Relative to reg-reg move (2). */
394 {2, 4, 2}, /* cost of storing integer registers */
395 2, /* cost of reg,reg fld/fst */
396 {2, 2, 6}, /* cost of loading fp registers
397 in SFmode, DFmode and XFmode */
398 {4, 4, 6}, /* cost of storing fp registers
399 in SFmode, DFmode and XFmode */
400 8, /* cost of moving MMX register */
401 {8, 8}, /* cost of loading MMX registers
402 in SImode and DImode */
403 {8, 8}, /* cost of storing MMX registers
404 in SImode and DImode */
405 2, /* cost of moving SSE register */
406 {4, 8, 16}, /* cost of loading SSE registers
407 in SImode, DImode and TImode */
408 {4, 8, 16}, /* cost of storing SSE registers
409 in SImode, DImode and TImode */
410 3, /* MMX or SSE register to integer */
411 8, /* size of l1 cache. */
412 8, /* size of l2 cache */
413 0, /* size of prefetch block */
414 0, /* number of parallel prefetches */
416 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
417 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
418 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
419 COSTS_N_INSNS (1), /* cost of FABS instruction. */
420 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
421 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
424 1, /* scalar_stmt_cost. */
425 1, /* scalar load_cost. */
426 1, /* scalar_store_cost. */
427 1, /* vec_stmt_cost. */
428 1, /* vec_to_scalar_cost. */
429 1, /* scalar_to_vec_cost. */
430 1, /* vec_align_load_cost. */
431 2, /* vec_unalign_load_cost. */
432 1, /* vec_store_cost. */
433 3, /* cond_taken_branch_cost. */
434 1, /* cond_not_taken_branch_cost. */
437 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
438 (we ensure the alignment). For small blocks inline loop is still a
439 noticeable win, for bigger blocks either rep movsl or rep movsb is
440 way to go. Rep movsb has apparently more expensive startup time in CPU,
441 but after 4K the difference is down in the noise. */
442 static stringop_algs pentiumpro_memcpy[2] = {
443 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
444 {8192, rep_prefix_4_byte, false},
445 {-1, rep_prefix_1_byte, false}}},
446 DUMMY_STRINGOP_ALGS};
447 static stringop_algs pentiumpro_memset[2] = {
448 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
449 {8192, rep_prefix_4_byte, false},
450 {-1, libcall, false}}},
451 DUMMY_STRINGOP_ALGS};
453 struct processor_costs pentiumpro_cost = {
454 COSTS_N_INSNS (1), /* cost of an add instruction */
455 COSTS_N_INSNS (1), /* cost of a lea instruction */
456 COSTS_N_INSNS (1), /* variable shift costs */
457 COSTS_N_INSNS (1), /* constant shift costs */
458 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
459 COSTS_N_INSNS (4), /* HI */
460 COSTS_N_INSNS (4), /* SI */
461 COSTS_N_INSNS (4), /* DI */
462 COSTS_N_INSNS (4)}, /* other */
463 0, /* cost of multiply per each bit set */
464 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
465 COSTS_N_INSNS (17), /* HI */
466 COSTS_N_INSNS (17), /* SI */
467 COSTS_N_INSNS (17), /* DI */
468 COSTS_N_INSNS (17)}, /* other */
469 COSTS_N_INSNS (1), /* cost of movsx */
470 COSTS_N_INSNS (1), /* cost of movzx */
471 8, /* "large" insn */
473 2, /* cost for loading QImode using movzbl */
474 {4, 4, 4}, /* cost of loading integer registers
475 in QImode, HImode and SImode.
476 Relative to reg-reg move (2). */
477 {2, 2, 2}, /* cost of storing integer registers */
478 2, /* cost of reg,reg fld/fst */
479 {2, 2, 6}, /* cost of loading fp registers
480 in SFmode, DFmode and XFmode */
481 {4, 4, 6}, /* cost of storing fp registers
482 in SFmode, DFmode and XFmode */
483 2, /* cost of moving MMX register */
484 {2, 2}, /* cost of loading MMX registers
485 in SImode and DImode */
486 {2, 2}, /* cost of storing MMX registers
487 in SImode and DImode */
488 2, /* cost of moving SSE register */
489 {2, 2, 8}, /* cost of loading SSE registers
490 in SImode, DImode and TImode */
491 {2, 2, 8}, /* cost of storing SSE registers
492 in SImode, DImode and TImode */
493 3, /* MMX or SSE register to integer */
494 8, /* size of l1 cache. */
495 256, /* size of l2 cache */
496 32, /* size of prefetch block */
497 6, /* number of parallel prefetches */
499 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
500 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
501 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
502 COSTS_N_INSNS (2), /* cost of FABS instruction. */
503 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
504 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
507 1, /* scalar_stmt_cost. */
508 1, /* scalar load_cost. */
509 1, /* scalar_store_cost. */
510 1, /* vec_stmt_cost. */
511 1, /* vec_to_scalar_cost. */
512 1, /* scalar_to_vec_cost. */
513 1, /* vec_align_load_cost. */
514 2, /* vec_unalign_load_cost. */
515 1, /* vec_store_cost. */
516 3, /* cond_taken_branch_cost. */
517 1, /* cond_not_taken_branch_cost. */
520 static stringop_algs geode_memcpy[2] = {
521 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
522 DUMMY_STRINGOP_ALGS};
523 static stringop_algs geode_memset[2] = {
524 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
525 DUMMY_STRINGOP_ALGS};
527 struct processor_costs geode_cost = {
528 COSTS_N_INSNS (1), /* cost of an add instruction */
529 COSTS_N_INSNS (1), /* cost of a lea instruction */
530 COSTS_N_INSNS (2), /* variable shift costs */
531 COSTS_N_INSNS (1), /* constant shift costs */
532 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
533 COSTS_N_INSNS (4), /* HI */
534 COSTS_N_INSNS (7), /* SI */
535 COSTS_N_INSNS (7), /* DI */
536 COSTS_N_INSNS (7)}, /* other */
537 0, /* cost of multiply per each bit set */
538 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
539 COSTS_N_INSNS (23), /* HI */
540 COSTS_N_INSNS (39), /* SI */
541 COSTS_N_INSNS (39), /* DI */
542 COSTS_N_INSNS (39)}, /* other */
543 COSTS_N_INSNS (1), /* cost of movsx */
544 COSTS_N_INSNS (1), /* cost of movzx */
545 8, /* "large" insn */
547 1, /* cost for loading QImode using movzbl */
548 {1, 1, 1}, /* cost of loading integer registers
549 in QImode, HImode and SImode.
550 Relative to reg-reg move (2). */
551 {1, 1, 1}, /* cost of storing integer registers */
552 1, /* cost of reg,reg fld/fst */
553 {1, 1, 1}, /* cost of loading fp registers
554 in SFmode, DFmode and XFmode */
555 {4, 6, 6}, /* cost of storing fp registers
556 in SFmode, DFmode and XFmode */
558 1, /* cost of moving MMX register */
559 {1, 1}, /* cost of loading MMX registers
560 in SImode and DImode */
561 {1, 1}, /* cost of storing MMX registers
562 in SImode and DImode */
563 1, /* cost of moving SSE register */
564 {1, 1, 1}, /* cost of loading SSE registers
565 in SImode, DImode and TImode */
566 {1, 1, 1}, /* cost of storing SSE registers
567 in SImode, DImode and TImode */
568 1, /* MMX or SSE register to integer */
569 64, /* size of l1 cache. */
570 128, /* size of l2 cache. */
571 32, /* size of prefetch block */
572 1, /* number of parallel prefetches */
574 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
575 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
576 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
577 COSTS_N_INSNS (1), /* cost of FABS instruction. */
578 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
579 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
582 1, /* scalar_stmt_cost. */
583 1, /* scalar load_cost. */
584 1, /* scalar_store_cost. */
585 1, /* vec_stmt_cost. */
586 1, /* vec_to_scalar_cost. */
587 1, /* scalar_to_vec_cost. */
588 1, /* vec_align_load_cost. */
589 2, /* vec_unalign_load_cost. */
590 1, /* vec_store_cost. */
591 3, /* cond_taken_branch_cost. */
592 1, /* cond_not_taken_branch_cost. */
595 static stringop_algs k6_memcpy[2] = {
596 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
597 DUMMY_STRINGOP_ALGS};
598 static stringop_algs k6_memset[2] = {
599 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
600 DUMMY_STRINGOP_ALGS};
602 struct processor_costs k6_cost = {
603 COSTS_N_INSNS (1), /* cost of an add instruction */
604 COSTS_N_INSNS (2), /* cost of a lea instruction */
605 COSTS_N_INSNS (1), /* variable shift costs */
606 COSTS_N_INSNS (1), /* constant shift costs */
607 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
608 COSTS_N_INSNS (3), /* HI */
609 COSTS_N_INSNS (3), /* SI */
610 COSTS_N_INSNS (3), /* DI */
611 COSTS_N_INSNS (3)}, /* other */
612 0, /* cost of multiply per each bit set */
613 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
614 COSTS_N_INSNS (18), /* HI */
615 COSTS_N_INSNS (18), /* SI */
616 COSTS_N_INSNS (18), /* DI */
617 COSTS_N_INSNS (18)}, /* other */
618 COSTS_N_INSNS (2), /* cost of movsx */
619 COSTS_N_INSNS (2), /* cost of movzx */
620 8, /* "large" insn */
622 3, /* cost for loading QImode using movzbl */
623 {4, 5, 4}, /* cost of loading integer registers
624 in QImode, HImode and SImode.
625 Relative to reg-reg move (2). */
626 {2, 3, 2}, /* cost of storing integer registers */
627 4, /* cost of reg,reg fld/fst */
628 {6, 6, 6}, /* cost of loading fp registers
629 in SFmode, DFmode and XFmode */
630 {4, 4, 4}, /* cost of storing fp registers
631 in SFmode, DFmode and XFmode */
632 2, /* cost of moving MMX register */
633 {2, 2}, /* cost of loading MMX registers
634 in SImode and DImode */
635 {2, 2}, /* cost of storing MMX registers
636 in SImode and DImode */
637 2, /* cost of moving SSE register */
638 {2, 2, 8}, /* cost of loading SSE registers
639 in SImode, DImode and TImode */
640 {2, 2, 8}, /* cost of storing SSE registers
641 in SImode, DImode and TImode */
642 6, /* MMX or SSE register to integer */
643 32, /* size of l1 cache. */
644 32, /* size of l2 cache. Some models
645 have integrated l2 cache, but
646 optimizing for k6 is not important
647 enough to worry about that. */
648 32, /* size of prefetch block */
649 1, /* number of parallel prefetches */
651 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
652 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
653 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
654 COSTS_N_INSNS (2), /* cost of FABS instruction. */
655 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
656 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
659 1, /* scalar_stmt_cost. */
660 1, /* scalar load_cost. */
661 1, /* scalar_store_cost. */
662 1, /* vec_stmt_cost. */
663 1, /* vec_to_scalar_cost. */
664 1, /* scalar_to_vec_cost. */
665 1, /* vec_align_load_cost. */
666 2, /* vec_unalign_load_cost. */
667 1, /* vec_store_cost. */
668 3, /* cond_taken_branch_cost. */
669 1, /* cond_not_taken_branch_cost. */
672 /* For some reason, Athlon deals better with REP prefix (relative to loops)
673 compared to K8. Alignment becomes important after 8 bytes for memcpy and
674 128 bytes for memset. */
675 static stringop_algs athlon_memcpy[2] = {
676 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
677 DUMMY_STRINGOP_ALGS};
678 static stringop_algs athlon_memset[2] = {
679 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
680 DUMMY_STRINGOP_ALGS};
682 struct processor_costs athlon_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (2), /* cost of a lea instruction */
685 COSTS_N_INSNS (1), /* variable shift costs */
686 COSTS_N_INSNS (1), /* constant shift costs */
687 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (5), /* HI */
689 COSTS_N_INSNS (5), /* SI */
690 COSTS_N_INSNS (5), /* DI */
691 COSTS_N_INSNS (5)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (26), /* HI */
695 COSTS_N_INSNS (42), /* SI */
696 COSTS_N_INSNS (74), /* DI */
697 COSTS_N_INSNS (74)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 8, /* "large" insn */
702 4, /* cost for loading QImode using movzbl */
703 {3, 4, 3}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {3, 4, 3}, /* cost of storing integer registers */
707 4, /* cost of reg,reg fld/fst */
708 {4, 4, 12}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {6, 6, 8}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {4, 4}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {4, 4}, /* cost of storing MMX registers
716 in SImode and DImode */
717 2, /* cost of moving SSE register */
718 {4, 4, 6}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {4, 4, 5}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 5, /* MMX or SSE register to integer */
723 64, /* size of l1 cache. */
724 256, /* size of l2 cache. */
725 64, /* size of prefetch block */
726 6, /* number of parallel prefetches */
728 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
729 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
730 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
731 COSTS_N_INSNS (2), /* cost of FABS instruction. */
732 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
733 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
736 1, /* scalar_stmt_cost. */
737 1, /* scalar load_cost. */
738 1, /* scalar_store_cost. */
739 1, /* vec_stmt_cost. */
740 1, /* vec_to_scalar_cost. */
741 1, /* scalar_to_vec_cost. */
742 1, /* vec_align_load_cost. */
743 2, /* vec_unalign_load_cost. */
744 1, /* vec_store_cost. */
745 3, /* cond_taken_branch_cost. */
746 1, /* cond_not_taken_branch_cost. */
749 /* K8 has optimized REP instruction for medium sized blocks, but for very
750 small blocks it is better to use loop. For large blocks, libcall can
751 do nontemporary accesses and beat inline considerably. */
752 static stringop_algs k8_memcpy[2] = {
753 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
754 {-1, rep_prefix_4_byte, false}}},
755 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
756 {-1, libcall, false}}}};
757 static stringop_algs k8_memset[2] = {
758 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
759 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
760 {libcall, {{48, unrolled_loop, false},
761 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
763 struct processor_costs k8_cost = {
764 COSTS_N_INSNS (1), /* cost of an add instruction */
765 COSTS_N_INSNS (2), /* cost of a lea instruction */
766 COSTS_N_INSNS (1), /* variable shift costs */
767 COSTS_N_INSNS (1), /* constant shift costs */
768 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
769 COSTS_N_INSNS (4), /* HI */
770 COSTS_N_INSNS (3), /* SI */
771 COSTS_N_INSNS (4), /* DI */
772 COSTS_N_INSNS (5)}, /* other */
773 0, /* cost of multiply per each bit set */
774 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
775 COSTS_N_INSNS (26), /* HI */
776 COSTS_N_INSNS (42), /* SI */
777 COSTS_N_INSNS (74), /* DI */
778 COSTS_N_INSNS (74)}, /* other */
779 COSTS_N_INSNS (1), /* cost of movsx */
780 COSTS_N_INSNS (1), /* cost of movzx */
781 8, /* "large" insn */
783 4, /* cost for loading QImode using movzbl */
784 {3, 4, 3}, /* cost of loading integer registers
785 in QImode, HImode and SImode.
786 Relative to reg-reg move (2). */
787 {3, 4, 3}, /* cost of storing integer registers */
788 4, /* cost of reg,reg fld/fst */
789 {4, 4, 12}, /* cost of loading fp registers
790 in SFmode, DFmode and XFmode */
791 {6, 6, 8}, /* cost of storing fp registers
792 in SFmode, DFmode and XFmode */
793 2, /* cost of moving MMX register */
794 {3, 3}, /* cost of loading MMX registers
795 in SImode and DImode */
796 {4, 4}, /* cost of storing MMX registers
797 in SImode and DImode */
798 2, /* cost of moving SSE register */
799 {4, 3, 6}, /* cost of loading SSE registers
800 in SImode, DImode and TImode */
801 {4, 4, 5}, /* cost of storing SSE registers
802 in SImode, DImode and TImode */
803 5, /* MMX or SSE register to integer */
804 64, /* size of l1 cache. */
805 512, /* size of l2 cache. */
806 64, /* size of prefetch block */
807 /* New AMD processors never drop prefetches; if they cannot be performed
808 immediately, they are queued. We set number of simultaneous prefetches
809 to a large constant to reflect this (it probably is not a good idea not
810 to limit number of prefetches at all, as their execution also takes some
812 100, /* number of parallel prefetches */
814 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
815 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
816 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
817 COSTS_N_INSNS (2), /* cost of FABS instruction. */
818 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
819 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
823 4, /* scalar_stmt_cost. */
824 2, /* scalar load_cost. */
825 2, /* scalar_store_cost. */
826 5, /* vec_stmt_cost. */
827 0, /* vec_to_scalar_cost. */
828 2, /* scalar_to_vec_cost. */
829 2, /* vec_align_load_cost. */
830 3, /* vec_unalign_load_cost. */
831 3, /* vec_store_cost. */
832 3, /* cond_taken_branch_cost. */
833 2, /* cond_not_taken_branch_cost. */
836 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
837 very small blocks it is better to use loop. For large blocks, libcall can
838 do nontemporary accesses and beat inline considerably. */
839 static stringop_algs amdfam10_memcpy[2] = {
840 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
841 {-1, rep_prefix_4_byte, false}}},
842 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
843 {-1, libcall, false}}}};
844 static stringop_algs amdfam10_memset[2] = {
845 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
846 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
847 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
848 {-1, libcall, false}}}};
849 struct processor_costs amdfam10_cost = {
850 COSTS_N_INSNS (1), /* cost of an add instruction */
851 COSTS_N_INSNS (2), /* cost of a lea instruction */
852 COSTS_N_INSNS (1), /* variable shift costs */
853 COSTS_N_INSNS (1), /* constant shift costs */
854 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
855 COSTS_N_INSNS (4), /* HI */
856 COSTS_N_INSNS (3), /* SI */
857 COSTS_N_INSNS (4), /* DI */
858 COSTS_N_INSNS (5)}, /* other */
859 0, /* cost of multiply per each bit set */
860 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
861 COSTS_N_INSNS (35), /* HI */
862 COSTS_N_INSNS (51), /* SI */
863 COSTS_N_INSNS (83), /* DI */
864 COSTS_N_INSNS (83)}, /* other */
865 COSTS_N_INSNS (1), /* cost of movsx */
866 COSTS_N_INSNS (1), /* cost of movzx */
867 8, /* "large" insn */
869 4, /* cost for loading QImode using movzbl */
870 {3, 4, 3}, /* cost of loading integer registers
871 in QImode, HImode and SImode.
872 Relative to reg-reg move (2). */
873 {3, 4, 3}, /* cost of storing integer registers */
874 4, /* cost of reg,reg fld/fst */
875 {4, 4, 12}, /* cost of loading fp registers
876 in SFmode, DFmode and XFmode */
877 {6, 6, 8}, /* cost of storing fp registers
878 in SFmode, DFmode and XFmode */
879 2, /* cost of moving MMX register */
880 {3, 3}, /* cost of loading MMX registers
881 in SImode and DImode */
882 {4, 4}, /* cost of storing MMX registers
883 in SImode and DImode */
884 2, /* cost of moving SSE register */
885 {4, 4, 3}, /* cost of loading SSE registers
886 in SImode, DImode and TImode */
887 {4, 4, 5}, /* cost of storing SSE registers
888 in SImode, DImode and TImode */
889 3, /* MMX or SSE register to integer */
891 MOVD reg64, xmmreg Double FSTORE 4
892 MOVD reg32, xmmreg Double FSTORE 4
894 MOVD reg64, xmmreg Double FADD 3
896 MOVD reg32, xmmreg Double FADD 3
898 64, /* size of l1 cache. */
899 512, /* size of l2 cache. */
900 64, /* size of prefetch block */
901 /* New AMD processors never drop prefetches; if they cannot be performed
902 immediately, they are queued. We set number of simultaneous prefetches
903 to a large constant to reflect this (it probably is not a good idea not
904 to limit number of prefetches at all, as their execution also takes some
906 100, /* number of parallel prefetches */
908 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
909 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
910 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
911 COSTS_N_INSNS (2), /* cost of FABS instruction. */
912 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
913 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
917 4, /* scalar_stmt_cost. */
918 2, /* scalar load_cost. */
919 2, /* scalar_store_cost. */
920 6, /* vec_stmt_cost. */
921 0, /* vec_to_scalar_cost. */
922 2, /* scalar_to_vec_cost. */
923 2, /* vec_align_load_cost. */
924 2, /* vec_unalign_load_cost. */
925 2, /* vec_store_cost. */
926 2, /* cond_taken_branch_cost. */
927 1, /* cond_not_taken_branch_cost. */
930 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
931 very small blocks it is better to use loop. For large blocks, libcall
932 can do nontemporary accesses and beat inline considerably. */
933 static stringop_algs bdver1_memcpy[2] = {
934 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
935 {-1, rep_prefix_4_byte, false}}},
936 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
937 {-1, libcall, false}}}};
938 static stringop_algs bdver1_memset[2] = {
939 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
940 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
941 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
942 {-1, libcall, false}}}};
944 const struct processor_costs bdver1_cost = {
945 COSTS_N_INSNS (1), /* cost of an add instruction */
946 COSTS_N_INSNS (1), /* cost of a lea instruction */
947 COSTS_N_INSNS (1), /* variable shift costs */
948 COSTS_N_INSNS (1), /* constant shift costs */
949 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
950 COSTS_N_INSNS (4), /* HI */
951 COSTS_N_INSNS (4), /* SI */
952 COSTS_N_INSNS (6), /* DI */
953 COSTS_N_INSNS (6)}, /* other */
954 0, /* cost of multiply per each bit set */
955 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
956 COSTS_N_INSNS (35), /* HI */
957 COSTS_N_INSNS (51), /* SI */
958 COSTS_N_INSNS (83), /* DI */
959 COSTS_N_INSNS (83)}, /* other */
960 COSTS_N_INSNS (1), /* cost of movsx */
961 COSTS_N_INSNS (1), /* cost of movzx */
962 8, /* "large" insn */
964 4, /* cost for loading QImode using movzbl */
965 {5, 5, 4}, /* cost of loading integer registers
966 in QImode, HImode and SImode.
967 Relative to reg-reg move (2). */
968 {4, 4, 4}, /* cost of storing integer registers */
969 2, /* cost of reg,reg fld/fst */
970 {5, 5, 12}, /* cost of loading fp registers
971 in SFmode, DFmode and XFmode */
972 {4, 4, 8}, /* cost of storing fp registers
973 in SFmode, DFmode and XFmode */
974 2, /* cost of moving MMX register */
975 {4, 4}, /* cost of loading MMX registers
976 in SImode and DImode */
977 {4, 4}, /* cost of storing MMX registers
978 in SImode and DImode */
979 2, /* cost of moving SSE register */
980 {4, 4, 4}, /* cost of loading SSE registers
981 in SImode, DImode and TImode */
982 {4, 4, 4}, /* cost of storing SSE registers
983 in SImode, DImode and TImode */
984 2, /* MMX or SSE register to integer */
986 MOVD reg64, xmmreg Double FSTORE 4
987 MOVD reg32, xmmreg Double FSTORE 4
989 MOVD reg64, xmmreg Double FADD 3
991 MOVD reg32, xmmreg Double FADD 3
993 16, /* size of l1 cache. */
994 2048, /* size of l2 cache. */
995 64, /* size of prefetch block */
996 /* New AMD processors never drop prefetches; if they cannot be performed
997 immediately, they are queued. We set number of simultaneous prefetches
998 to a large constant to reflect this (it probably is not a good idea not
999 to limit number of prefetches at all, as their execution also takes some
1001 100, /* number of parallel prefetches */
1002 2, /* Branch cost */
1003 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1004 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1005 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1006 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1007 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1008 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1012 6, /* scalar_stmt_cost. */
1013 4, /* scalar load_cost. */
1014 4, /* scalar_store_cost. */
1015 6, /* vec_stmt_cost. */
1016 0, /* vec_to_scalar_cost. */
1017 2, /* scalar_to_vec_cost. */
1018 4, /* vec_align_load_cost. */
1019 4, /* vec_unalign_load_cost. */
1020 4, /* vec_store_cost. */
1021 2, /* cond_taken_branch_cost. */
1022 1, /* cond_not_taken_branch_cost. */
1025 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1026 very small blocks it is better to use loop. For large blocks, libcall
1027 can do nontemporary accesses and beat inline considerably. */
1029 static stringop_algs bdver2_memcpy[2] = {
1030 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1031 {-1, rep_prefix_4_byte, false}}},
1032 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1033 {-1, libcall, false}}}};
1034 static stringop_algs bdver2_memset[2] = {
1035 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1036 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1037 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1038 {-1, libcall, false}}}};
1040 const struct processor_costs bdver2_cost = {
1041 COSTS_N_INSNS (1), /* cost of an add instruction */
1042 COSTS_N_INSNS (1), /* cost of a lea instruction */
1043 COSTS_N_INSNS (1), /* variable shift costs */
1044 COSTS_N_INSNS (1), /* constant shift costs */
1045 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1046 COSTS_N_INSNS (4), /* HI */
1047 COSTS_N_INSNS (4), /* SI */
1048 COSTS_N_INSNS (6), /* DI */
1049 COSTS_N_INSNS (6)}, /* other */
1050 0, /* cost of multiply per each bit set */
1051 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1052 COSTS_N_INSNS (35), /* HI */
1053 COSTS_N_INSNS (51), /* SI */
1054 COSTS_N_INSNS (83), /* DI */
1055 COSTS_N_INSNS (83)}, /* other */
1056 COSTS_N_INSNS (1), /* cost of movsx */
1057 COSTS_N_INSNS (1), /* cost of movzx */
1058 8, /* "large" insn */
1060 4, /* cost for loading QImode using movzbl */
1061 {5, 5, 4}, /* cost of loading integer registers
1062 in QImode, HImode and SImode.
1063 Relative to reg-reg move (2). */
1064 {4, 4, 4}, /* cost of storing integer registers */
1065 2, /* cost of reg,reg fld/fst */
1066 {5, 5, 12}, /* cost of loading fp registers
1067 in SFmode, DFmode and XFmode */
1068 {4, 4, 8}, /* cost of storing fp registers
1069 in SFmode, DFmode and XFmode */
1070 2, /* cost of moving MMX register */
1071 {4, 4}, /* cost of loading MMX registers
1072 in SImode and DImode */
1073 {4, 4}, /* cost of storing MMX registers
1074 in SImode and DImode */
1075 2, /* cost of moving SSE register */
1076 {4, 4, 4}, /* cost of loading SSE registers
1077 in SImode, DImode and TImode */
1078 {4, 4, 4}, /* cost of storing SSE registers
1079 in SImode, DImode and TImode */
1080 2, /* MMX or SSE register to integer */
1082 MOVD reg64, xmmreg Double FSTORE 4
1083 MOVD reg32, xmmreg Double FSTORE 4
1085 MOVD reg64, xmmreg Double FADD 3
1087 MOVD reg32, xmmreg Double FADD 3
1089 16, /* size of l1 cache. */
1090 2048, /* size of l2 cache. */
1091 64, /* size of prefetch block */
1092 /* New AMD processors never drop prefetches; if they cannot be performed
1093 immediately, they are queued. We set number of simultaneous prefetches
1094 to a large constant to reflect this (it probably is not a good idea not
1095 to limit number of prefetches at all, as their execution also takes some
1097 100, /* number of parallel prefetches */
1098 2, /* Branch cost */
1099 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1100 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1101 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1102 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1103 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1104 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1108 6, /* scalar_stmt_cost. */
1109 4, /* scalar load_cost. */
1110 4, /* scalar_store_cost. */
1111 6, /* vec_stmt_cost. */
1112 0, /* vec_to_scalar_cost. */
1113 2, /* scalar_to_vec_cost. */
1114 4, /* vec_align_load_cost. */
1115 4, /* vec_unalign_load_cost. */
1116 4, /* vec_store_cost. */
1117 2, /* cond_taken_branch_cost. */
1118 1, /* cond_not_taken_branch_cost. */
1122 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1123 very small blocks it is better to use loop. For large blocks, libcall
1124 can do nontemporary accesses and beat inline considerably. */
1125 static stringop_algs bdver3_memcpy[2] = {
1126 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1127 {-1, rep_prefix_4_byte, false}}},
1128 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1129 {-1, libcall, false}}}};
1130 static stringop_algs bdver3_memset[2] = {
1131 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1132 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1133 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1134 {-1, libcall, false}}}};
1135 struct processor_costs bdver3_cost = {
1136 COSTS_N_INSNS (1), /* cost of an add instruction */
1137 COSTS_N_INSNS (1), /* cost of a lea instruction */
1138 COSTS_N_INSNS (1), /* variable shift costs */
1139 COSTS_N_INSNS (1), /* constant shift costs */
1140 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1141 COSTS_N_INSNS (4), /* HI */
1142 COSTS_N_INSNS (4), /* SI */
1143 COSTS_N_INSNS (6), /* DI */
1144 COSTS_N_INSNS (6)}, /* other */
1145 0, /* cost of multiply per each bit set */
1146 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1147 COSTS_N_INSNS (35), /* HI */
1148 COSTS_N_INSNS (51), /* SI */
1149 COSTS_N_INSNS (83), /* DI */
1150 COSTS_N_INSNS (83)}, /* other */
1151 COSTS_N_INSNS (1), /* cost of movsx */
1152 COSTS_N_INSNS (1), /* cost of movzx */
1153 8, /* "large" insn */
1155 4, /* cost for loading QImode using movzbl */
1156 {5, 5, 4}, /* cost of loading integer registers
1157 in QImode, HImode and SImode.
1158 Relative to reg-reg move (2). */
1159 {4, 4, 4}, /* cost of storing integer registers */
1160 2, /* cost of reg,reg fld/fst */
1161 {5, 5, 12}, /* cost of loading fp registers
1162 in SFmode, DFmode and XFmode */
1163 {4, 4, 8}, /* cost of storing fp registers
1164 in SFmode, DFmode and XFmode */
1165 2, /* cost of moving MMX register */
1166 {4, 4}, /* cost of loading MMX registers
1167 in SImode and DImode */
1168 {4, 4}, /* cost of storing MMX registers
1169 in SImode and DImode */
1170 2, /* cost of moving SSE register */
1171 {4, 4, 4}, /* cost of loading SSE registers
1172 in SImode, DImode and TImode */
1173 {4, 4, 4}, /* cost of storing SSE registers
1174 in SImode, DImode and TImode */
1175 2, /* MMX or SSE register to integer */
1176 16, /* size of l1 cache. */
1177 2048, /* size of l2 cache. */
1178 64, /* size of prefetch block */
1179 /* New AMD processors never drop prefetches; if they cannot be performed
1180 immediately, they are queued. We set number of simultaneous prefetches
1181 to a large constant to reflect this (it probably is not a good idea not
1182 to limit number of prefetches at all, as their execution also takes some
1184 100, /* number of parallel prefetches */
1185 2, /* Branch cost */
1186 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1187 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1188 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1189 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1190 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1191 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1195 6, /* scalar_stmt_cost. */
1196 4, /* scalar load_cost. */
1197 4, /* scalar_store_cost. */
1198 6, /* vec_stmt_cost. */
1199 0, /* vec_to_scalar_cost. */
1200 2, /* scalar_to_vec_cost. */
1201 4, /* vec_align_load_cost. */
1202 4, /* vec_unalign_load_cost. */
1203 4, /* vec_store_cost. */
1204 2, /* cond_taken_branch_cost. */
1205 1, /* cond_not_taken_branch_cost. */
1208 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1209 very small blocks it is better to use loop. For large blocks, libcall
1210 can do nontemporary accesses and beat inline considerably. */
1211 static stringop_algs bdver4_memcpy[2] = {
1212 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1213 {-1, rep_prefix_4_byte, false}}},
1214 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1215 {-1, libcall, false}}}};
1216 static stringop_algs bdver4_memset[2] = {
1217 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1218 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1219 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1220 {-1, libcall, false}}}};
1221 struct processor_costs bdver4_cost = {
1222 COSTS_N_INSNS (1), /* cost of an add instruction */
1223 COSTS_N_INSNS (1), /* cost of a lea instruction */
1224 COSTS_N_INSNS (1), /* variable shift costs */
1225 COSTS_N_INSNS (1), /* constant shift costs */
1226 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1227 COSTS_N_INSNS (4), /* HI */
1228 COSTS_N_INSNS (4), /* SI */
1229 COSTS_N_INSNS (6), /* DI */
1230 COSTS_N_INSNS (6)}, /* other */
1231 0, /* cost of multiply per each bit set */
1232 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1233 COSTS_N_INSNS (35), /* HI */
1234 COSTS_N_INSNS (51), /* SI */
1235 COSTS_N_INSNS (83), /* DI */
1236 COSTS_N_INSNS (83)}, /* other */
1237 COSTS_N_INSNS (1), /* cost of movsx */
1238 COSTS_N_INSNS (1), /* cost of movzx */
1239 8, /* "large" insn */
1241 4, /* cost for loading QImode using movzbl */
1242 {5, 5, 4}, /* cost of loading integer registers
1243 in QImode, HImode and SImode.
1244 Relative to reg-reg move (2). */
1245 {4, 4, 4}, /* cost of storing integer registers */
1246 2, /* cost of reg,reg fld/fst */
1247 {5, 5, 12}, /* cost of loading fp registers
1248 in SFmode, DFmode and XFmode */
1249 {4, 4, 8}, /* cost of storing fp registers
1250 in SFmode, DFmode and XFmode */
1251 2, /* cost of moving MMX register */
1252 {4, 4}, /* cost of loading MMX registers
1253 in SImode and DImode */
1254 {4, 4}, /* cost of storing MMX registers
1255 in SImode and DImode */
1256 2, /* cost of moving SSE register */
1257 {4, 4, 4}, /* cost of loading SSE registers
1258 in SImode, DImode and TImode */
1259 {4, 4, 4}, /* cost of storing SSE registers
1260 in SImode, DImode and TImode */
1261 2, /* MMX or SSE register to integer */
1262 16, /* size of l1 cache. */
1263 2048, /* size of l2 cache. */
1264 64, /* size of prefetch block */
1265 /* New AMD processors never drop prefetches; if they cannot be performed
1266 immediately, they are queued. We set number of simultaneous prefetches
1267 to a large constant to reflect this (it probably is not a good idea not
1268 to limit number of prefetches at all, as their execution also takes some
1270 100, /* number of parallel prefetches */
1271 2, /* Branch cost */
1272 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1273 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1274 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1275 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1276 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1277 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1281 6, /* scalar_stmt_cost. */
1282 4, /* scalar load_cost. */
1283 4, /* scalar_store_cost. */
1284 6, /* vec_stmt_cost. */
1285 0, /* vec_to_scalar_cost. */
1286 2, /* scalar_to_vec_cost. */
1287 4, /* vec_align_load_cost. */
1288 4, /* vec_unalign_load_cost. */
1289 4, /* vec_store_cost. */
1290 2, /* cond_taken_branch_cost. */
1291 1, /* cond_not_taken_branch_cost. */
1294 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1295 very small blocks it is better to use loop. For large blocks, libcall can
1296 do nontemporary accesses and beat inline considerably. */
1297 static stringop_algs btver1_memcpy[2] = {
1298 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1299 {-1, rep_prefix_4_byte, false}}},
1300 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1301 {-1, libcall, false}}}};
1302 static stringop_algs btver1_memset[2] = {
1303 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1304 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1305 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1306 {-1, libcall, false}}}};
1307 const struct processor_costs btver1_cost = {
1308 COSTS_N_INSNS (1), /* cost of an add instruction */
1309 COSTS_N_INSNS (2), /* cost of a lea instruction */
1310 COSTS_N_INSNS (1), /* variable shift costs */
1311 COSTS_N_INSNS (1), /* constant shift costs */
1312 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1313 COSTS_N_INSNS (4), /* HI */
1314 COSTS_N_INSNS (3), /* SI */
1315 COSTS_N_INSNS (4), /* DI */
1316 COSTS_N_INSNS (5)}, /* other */
1317 0, /* cost of multiply per each bit set */
1318 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1319 COSTS_N_INSNS (35), /* HI */
1320 COSTS_N_INSNS (51), /* SI */
1321 COSTS_N_INSNS (83), /* DI */
1322 COSTS_N_INSNS (83)}, /* other */
1323 COSTS_N_INSNS (1), /* cost of movsx */
1324 COSTS_N_INSNS (1), /* cost of movzx */
1325 8, /* "large" insn */
1327 4, /* cost for loading QImode using movzbl */
1328 {3, 4, 3}, /* cost of loading integer registers
1329 in QImode, HImode and SImode.
1330 Relative to reg-reg move (2). */
1331 {3, 4, 3}, /* cost of storing integer registers */
1332 4, /* cost of reg,reg fld/fst */
1333 {4, 4, 12}, /* cost of loading fp registers
1334 in SFmode, DFmode and XFmode */
1335 {6, 6, 8}, /* cost of storing fp registers
1336 in SFmode, DFmode and XFmode */
1337 2, /* cost of moving MMX register */
1338 {3, 3}, /* cost of loading MMX registers
1339 in SImode and DImode */
1340 {4, 4}, /* cost of storing MMX registers
1341 in SImode and DImode */
1342 2, /* cost of moving SSE register */
1343 {4, 4, 3}, /* cost of loading SSE registers
1344 in SImode, DImode and TImode */
1345 {4, 4, 5}, /* cost of storing SSE registers
1346 in SImode, DImode and TImode */
1347 3, /* MMX or SSE register to integer */
1349 MOVD reg64, xmmreg Double FSTORE 4
1350 MOVD reg32, xmmreg Double FSTORE 4
1352 MOVD reg64, xmmreg Double FADD 3
1354 MOVD reg32, xmmreg Double FADD 3
1356 32, /* size of l1 cache. */
1357 512, /* size of l2 cache. */
1358 64, /* size of prefetch block */
1359 100, /* number of parallel prefetches */
1360 2, /* Branch cost */
1361 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1362 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1363 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1364 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1365 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1366 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1370 4, /* scalar_stmt_cost. */
1371 2, /* scalar load_cost. */
1372 2, /* scalar_store_cost. */
1373 6, /* vec_stmt_cost. */
1374 0, /* vec_to_scalar_cost. */
1375 2, /* scalar_to_vec_cost. */
1376 2, /* vec_align_load_cost. */
1377 2, /* vec_unalign_load_cost. */
1378 2, /* vec_store_cost. */
1379 2, /* cond_taken_branch_cost. */
1380 1, /* cond_not_taken_branch_cost. */
1383 static stringop_algs btver2_memcpy[2] = {
1384 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1385 {-1, rep_prefix_4_byte, false}}},
1386 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1387 {-1, libcall, false}}}};
1388 static stringop_algs btver2_memset[2] = {
1389 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1390 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1391 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1392 {-1, libcall, false}}}};
1393 const struct processor_costs btver2_cost = {
1394 COSTS_N_INSNS (1), /* cost of an add instruction */
1395 COSTS_N_INSNS (2), /* cost of a lea instruction */
1396 COSTS_N_INSNS (1), /* variable shift costs */
1397 COSTS_N_INSNS (1), /* constant shift costs */
1398 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1399 COSTS_N_INSNS (4), /* HI */
1400 COSTS_N_INSNS (3), /* SI */
1401 COSTS_N_INSNS (4), /* DI */
1402 COSTS_N_INSNS (5)}, /* other */
1403 0, /* cost of multiply per each bit set */
1404 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1405 COSTS_N_INSNS (35), /* HI */
1406 COSTS_N_INSNS (51), /* SI */
1407 COSTS_N_INSNS (83), /* DI */
1408 COSTS_N_INSNS (83)}, /* other */
1409 COSTS_N_INSNS (1), /* cost of movsx */
1410 COSTS_N_INSNS (1), /* cost of movzx */
1411 8, /* "large" insn */
1413 4, /* cost for loading QImode using movzbl */
1414 {3, 4, 3}, /* cost of loading integer registers
1415 in QImode, HImode and SImode.
1416 Relative to reg-reg move (2). */
1417 {3, 4, 3}, /* cost of storing integer registers */
1418 4, /* cost of reg,reg fld/fst */
1419 {4, 4, 12}, /* cost of loading fp registers
1420 in SFmode, DFmode and XFmode */
1421 {6, 6, 8}, /* cost of storing fp registers
1422 in SFmode, DFmode and XFmode */
1423 2, /* cost of moving MMX register */
1424 {3, 3}, /* cost of loading MMX registers
1425 in SImode and DImode */
1426 {4, 4}, /* cost of storing MMX registers
1427 in SImode and DImode */
1428 2, /* cost of moving SSE register */
1429 {4, 4, 3}, /* cost of loading SSE registers
1430 in SImode, DImode and TImode */
1431 {4, 4, 5}, /* cost of storing SSE registers
1432 in SImode, DImode and TImode */
1433 3, /* MMX or SSE register to integer */
1435 MOVD reg64, xmmreg Double FSTORE 4
1436 MOVD reg32, xmmreg Double FSTORE 4
1438 MOVD reg64, xmmreg Double FADD 3
1440 MOVD reg32, xmmreg Double FADD 3
1442 32, /* size of l1 cache. */
1443 2048, /* size of l2 cache. */
1444 64, /* size of prefetch block */
1445 100, /* number of parallel prefetches */
1446 2, /* Branch cost */
1447 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1448 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1449 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1450 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1451 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1452 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1455 4, /* scalar_stmt_cost. */
1456 2, /* scalar load_cost. */
1457 2, /* scalar_store_cost. */
1458 6, /* vec_stmt_cost. */
1459 0, /* vec_to_scalar_cost. */
1460 2, /* scalar_to_vec_cost. */
1461 2, /* vec_align_load_cost. */
1462 2, /* vec_unalign_load_cost. */
1463 2, /* vec_store_cost. */
1464 2, /* cond_taken_branch_cost. */
1465 1, /* cond_not_taken_branch_cost. */
1468 static stringop_algs pentium4_memcpy[2] = {
1469 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1470 DUMMY_STRINGOP_ALGS};
1471 static stringop_algs pentium4_memset[2] = {
1472 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1473 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1474 DUMMY_STRINGOP_ALGS};
1477 struct processor_costs pentium4_cost = {
1478 COSTS_N_INSNS (1), /* cost of an add instruction */
1479 COSTS_N_INSNS (3), /* cost of a lea instruction */
1480 COSTS_N_INSNS (4), /* variable shift costs */
1481 COSTS_N_INSNS (4), /* constant shift costs */
1482 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1483 COSTS_N_INSNS (15), /* HI */
1484 COSTS_N_INSNS (15), /* SI */
1485 COSTS_N_INSNS (15), /* DI */
1486 COSTS_N_INSNS (15)}, /* other */
1487 0, /* cost of multiply per each bit set */
1488 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1489 COSTS_N_INSNS (56), /* HI */
1490 COSTS_N_INSNS (56), /* SI */
1491 COSTS_N_INSNS (56), /* DI */
1492 COSTS_N_INSNS (56)}, /* other */
1493 COSTS_N_INSNS (1), /* cost of movsx */
1494 COSTS_N_INSNS (1), /* cost of movzx */
1495 16, /* "large" insn */
1497 2, /* cost for loading QImode using movzbl */
1498 {4, 5, 4}, /* cost of loading integer registers
1499 in QImode, HImode and SImode.
1500 Relative to reg-reg move (2). */
1501 {2, 3, 2}, /* cost of storing integer registers */
1502 2, /* cost of reg,reg fld/fst */
1503 {2, 2, 6}, /* cost of loading fp registers
1504 in SFmode, DFmode and XFmode */
1505 {4, 4, 6}, /* cost of storing fp registers
1506 in SFmode, DFmode and XFmode */
1507 2, /* cost of moving MMX register */
1508 {2, 2}, /* cost of loading MMX registers
1509 in SImode and DImode */
1510 {2, 2}, /* cost of storing MMX registers
1511 in SImode and DImode */
1512 12, /* cost of moving SSE register */
1513 {12, 12, 12}, /* cost of loading SSE registers
1514 in SImode, DImode and TImode */
1515 {2, 2, 8}, /* cost of storing SSE registers
1516 in SImode, DImode and TImode */
1517 10, /* MMX or SSE register to integer */
1518 8, /* size of l1 cache. */
1519 256, /* size of l2 cache. */
1520 64, /* size of prefetch block */
1521 6, /* number of parallel prefetches */
1522 2, /* Branch cost */
1523 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1524 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1525 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1526 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1527 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1528 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1531 1, /* scalar_stmt_cost. */
1532 1, /* scalar load_cost. */
1533 1, /* scalar_store_cost. */
1534 1, /* vec_stmt_cost. */
1535 1, /* vec_to_scalar_cost. */
1536 1, /* scalar_to_vec_cost. */
1537 1, /* vec_align_load_cost. */
1538 2, /* vec_unalign_load_cost. */
1539 1, /* vec_store_cost. */
1540 3, /* cond_taken_branch_cost. */
1541 1, /* cond_not_taken_branch_cost. */
1544 static stringop_algs nocona_memcpy[2] = {
1545 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1546 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1547 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1549 static stringop_algs nocona_memset[2] = {
1550 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1551 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1552 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1553 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1556 struct processor_costs nocona_cost = {
1557 COSTS_N_INSNS (1), /* cost of an add instruction */
1558 COSTS_N_INSNS (1), /* cost of a lea instruction */
1559 COSTS_N_INSNS (1), /* variable shift costs */
1560 COSTS_N_INSNS (1), /* constant shift costs */
1561 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1562 COSTS_N_INSNS (10), /* HI */
1563 COSTS_N_INSNS (10), /* SI */
1564 COSTS_N_INSNS (10), /* DI */
1565 COSTS_N_INSNS (10)}, /* other */
1566 0, /* cost of multiply per each bit set */
1567 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1568 COSTS_N_INSNS (66), /* HI */
1569 COSTS_N_INSNS (66), /* SI */
1570 COSTS_N_INSNS (66), /* DI */
1571 COSTS_N_INSNS (66)}, /* other */
1572 COSTS_N_INSNS (1), /* cost of movsx */
1573 COSTS_N_INSNS (1), /* cost of movzx */
1574 16, /* "large" insn */
1575 17, /* MOVE_RATIO */
1576 4, /* cost for loading QImode using movzbl */
1577 {4, 4, 4}, /* cost of loading integer registers
1578 in QImode, HImode and SImode.
1579 Relative to reg-reg move (2). */
1580 {4, 4, 4}, /* cost of storing integer registers */
1581 3, /* cost of reg,reg fld/fst */
1582 {12, 12, 12}, /* cost of loading fp registers
1583 in SFmode, DFmode and XFmode */
1584 {4, 4, 4}, /* cost of storing fp registers
1585 in SFmode, DFmode and XFmode */
1586 6, /* cost of moving MMX register */
1587 {12, 12}, /* cost of loading MMX registers
1588 in SImode and DImode */
1589 {12, 12}, /* cost of storing MMX registers
1590 in SImode and DImode */
1591 6, /* cost of moving SSE register */
1592 {12, 12, 12}, /* cost of loading SSE registers
1593 in SImode, DImode and TImode */
1594 {12, 12, 12}, /* cost of storing SSE registers
1595 in SImode, DImode and TImode */
1596 8, /* MMX or SSE register to integer */
1597 8, /* size of l1 cache. */
1598 1024, /* size of l2 cache. */
1599 64, /* size of prefetch block */
1600 8, /* number of parallel prefetches */
1601 1, /* Branch cost */
1602 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1603 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1604 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1605 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1606 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1607 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1610 1, /* scalar_stmt_cost. */
1611 1, /* scalar load_cost. */
1612 1, /* scalar_store_cost. */
1613 1, /* vec_stmt_cost. */
1614 1, /* vec_to_scalar_cost. */
1615 1, /* scalar_to_vec_cost. */
1616 1, /* vec_align_load_cost. */
1617 2, /* vec_unalign_load_cost. */
1618 1, /* vec_store_cost. */
1619 3, /* cond_taken_branch_cost. */
1620 1, /* cond_not_taken_branch_cost. */
1623 static stringop_algs atom_memcpy[2] = {
1624 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1625 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1626 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1627 static stringop_algs atom_memset[2] = {
1628 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1629 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1630 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1631 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1633 struct processor_costs atom_cost = {
1634 COSTS_N_INSNS (1), /* cost of an add instruction */
1635 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1636 COSTS_N_INSNS (1), /* variable shift costs */
1637 COSTS_N_INSNS (1), /* constant shift costs */
1638 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1639 COSTS_N_INSNS (4), /* HI */
1640 COSTS_N_INSNS (3), /* SI */
1641 COSTS_N_INSNS (4), /* DI */
1642 COSTS_N_INSNS (2)}, /* other */
1643 0, /* cost of multiply per each bit set */
1644 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1645 COSTS_N_INSNS (26), /* HI */
1646 COSTS_N_INSNS (42), /* SI */
1647 COSTS_N_INSNS (74), /* DI */
1648 COSTS_N_INSNS (74)}, /* other */
1649 COSTS_N_INSNS (1), /* cost of movsx */
1650 COSTS_N_INSNS (1), /* cost of movzx */
1651 8, /* "large" insn */
1652 17, /* MOVE_RATIO */
1653 4, /* cost for loading QImode using movzbl */
1654 {4, 4, 4}, /* cost of loading integer registers
1655 in QImode, HImode and SImode.
1656 Relative to reg-reg move (2). */
1657 {4, 4, 4}, /* cost of storing integer registers */
1658 4, /* cost of reg,reg fld/fst */
1659 {12, 12, 12}, /* cost of loading fp registers
1660 in SFmode, DFmode and XFmode */
1661 {6, 6, 8}, /* cost of storing fp registers
1662 in SFmode, DFmode and XFmode */
1663 2, /* cost of moving MMX register */
1664 {8, 8}, /* cost of loading MMX registers
1665 in SImode and DImode */
1666 {8, 8}, /* cost of storing MMX registers
1667 in SImode and DImode */
1668 2, /* cost of moving SSE register */
1669 {8, 8, 8}, /* cost of loading SSE registers
1670 in SImode, DImode and TImode */
1671 {8, 8, 8}, /* cost of storing SSE registers
1672 in SImode, DImode and TImode */
1673 5, /* MMX or SSE register to integer */
1674 32, /* size of l1 cache. */
1675 256, /* size of l2 cache. */
1676 64, /* size of prefetch block */
1677 6, /* number of parallel prefetches */
1678 3, /* Branch cost */
1679 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1680 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1681 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1682 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1683 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1684 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1687 1, /* scalar_stmt_cost. */
1688 1, /* scalar load_cost. */
1689 1, /* scalar_store_cost. */
1690 1, /* vec_stmt_cost. */
1691 1, /* vec_to_scalar_cost. */
1692 1, /* scalar_to_vec_cost. */
1693 1, /* vec_align_load_cost. */
1694 2, /* vec_unalign_load_cost. */
1695 1, /* vec_store_cost. */
1696 3, /* cond_taken_branch_cost. */
1697 1, /* cond_not_taken_branch_cost. */
1700 static stringop_algs slm_memcpy[2] = {
1701 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1702 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1703 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1704 static stringop_algs slm_memset[2] = {
1705 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1706 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1707 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1708 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1710 struct processor_costs slm_cost = {
1711 COSTS_N_INSNS (1), /* cost of an add instruction */
1712 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1713 COSTS_N_INSNS (1), /* variable shift costs */
1714 COSTS_N_INSNS (1), /* constant shift costs */
1715 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1716 COSTS_N_INSNS (3), /* HI */
1717 COSTS_N_INSNS (3), /* SI */
1718 COSTS_N_INSNS (4), /* DI */
1719 COSTS_N_INSNS (2)}, /* other */
1720 0, /* cost of multiply per each bit set */
1721 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1722 COSTS_N_INSNS (26), /* HI */
1723 COSTS_N_INSNS (42), /* SI */
1724 COSTS_N_INSNS (74), /* DI */
1725 COSTS_N_INSNS (74)}, /* other */
1726 COSTS_N_INSNS (1), /* cost of movsx */
1727 COSTS_N_INSNS (1), /* cost of movzx */
1728 8, /* "large" insn */
1729 17, /* MOVE_RATIO */
1730 4, /* cost for loading QImode using movzbl */
1731 {4, 4, 4}, /* cost of loading integer registers
1732 in QImode, HImode and SImode.
1733 Relative to reg-reg move (2). */
1734 {4, 4, 4}, /* cost of storing integer registers */
1735 4, /* cost of reg,reg fld/fst */
1736 {12, 12, 12}, /* cost of loading fp registers
1737 in SFmode, DFmode and XFmode */
1738 {6, 6, 8}, /* cost of storing fp registers
1739 in SFmode, DFmode and XFmode */
1740 2, /* cost of moving MMX register */
1741 {8, 8}, /* cost of loading MMX registers
1742 in SImode and DImode */
1743 {8, 8}, /* cost of storing MMX registers
1744 in SImode and DImode */
1745 2, /* cost of moving SSE register */
1746 {8, 8, 8}, /* cost of loading SSE registers
1747 in SImode, DImode and TImode */
1748 {8, 8, 8}, /* cost of storing SSE registers
1749 in SImode, DImode and TImode */
1750 5, /* MMX or SSE register to integer */
1751 32, /* size of l1 cache. */
1752 256, /* size of l2 cache. */
1753 64, /* size of prefetch block */
1754 6, /* number of parallel prefetches */
1755 3, /* Branch cost */
1756 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1757 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1758 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1759 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1760 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1761 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1764 1, /* scalar_stmt_cost. */
1765 1, /* scalar load_cost. */
1766 1, /* scalar_store_cost. */
1767 1, /* vec_stmt_cost. */
1768 4, /* vec_to_scalar_cost. */
1769 1, /* scalar_to_vec_cost. */
1770 1, /* vec_align_load_cost. */
1771 2, /* vec_unalign_load_cost. */
1772 1, /* vec_store_cost. */
1773 3, /* cond_taken_branch_cost. */
1774 1, /* cond_not_taken_branch_cost. */
1777 static stringop_algs intel_memcpy[2] = {
1778 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1779 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1780 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1781 static stringop_algs intel_memset[2] = {
1782 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1783 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1784 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1785 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1787 struct processor_costs intel_cost = {
1788 COSTS_N_INSNS (1), /* cost of an add instruction */
1789 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1790 COSTS_N_INSNS (1), /* variable shift costs */
1791 COSTS_N_INSNS (1), /* constant shift costs */
1792 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1793 COSTS_N_INSNS (3), /* HI */
1794 COSTS_N_INSNS (3), /* SI */
1795 COSTS_N_INSNS (4), /* DI */
1796 COSTS_N_INSNS (2)}, /* other */
1797 0, /* cost of multiply per each bit set */
1798 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1799 COSTS_N_INSNS (26), /* HI */
1800 COSTS_N_INSNS (42), /* SI */
1801 COSTS_N_INSNS (74), /* DI */
1802 COSTS_N_INSNS (74)}, /* other */
1803 COSTS_N_INSNS (1), /* cost of movsx */
1804 COSTS_N_INSNS (1), /* cost of movzx */
1805 8, /* "large" insn */
1806 17, /* MOVE_RATIO */
1807 4, /* cost for loading QImode using movzbl */
1808 {4, 4, 4}, /* cost of loading integer registers
1809 in QImode, HImode and SImode.
1810 Relative to reg-reg move (2). */
1811 {4, 4, 4}, /* cost of storing integer registers */
1812 4, /* cost of reg,reg fld/fst */
1813 {12, 12, 12}, /* cost of loading fp registers
1814 in SFmode, DFmode and XFmode */
1815 {6, 6, 8}, /* cost of storing fp registers
1816 in SFmode, DFmode and XFmode */
1817 2, /* cost of moving MMX register */
1818 {8, 8}, /* cost of loading MMX registers
1819 in SImode and DImode */
1820 {8, 8}, /* cost of storing MMX registers
1821 in SImode and DImode */
1822 2, /* cost of moving SSE register */
1823 {8, 8, 8}, /* cost of loading SSE registers
1824 in SImode, DImode and TImode */
1825 {8, 8, 8}, /* cost of storing SSE registers
1826 in SImode, DImode and TImode */
1827 5, /* MMX or SSE register to integer */
1828 32, /* size of l1 cache. */
1829 256, /* size of l2 cache. */
1830 64, /* size of prefetch block */
1831 6, /* number of parallel prefetches */
1832 3, /* Branch cost */
1833 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1834 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1835 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1836 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1837 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1838 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1841 1, /* scalar_stmt_cost. */
1842 1, /* scalar load_cost. */
1843 1, /* scalar_store_cost. */
1844 1, /* vec_stmt_cost. */
1845 4, /* vec_to_scalar_cost. */
1846 1, /* scalar_to_vec_cost. */
1847 1, /* vec_align_load_cost. */
1848 2, /* vec_unalign_load_cost. */
1849 1, /* vec_store_cost. */
1850 3, /* cond_taken_branch_cost. */
1851 1, /* cond_not_taken_branch_cost. */
1854 /* Generic should produce code tuned for Core-i7 (and newer chips)
1855 and btver1 (and newer chips). */
1857 static stringop_algs generic_memcpy[2] = {
1858 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1859 {-1, libcall, false}}},
1860 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1861 {-1, libcall, false}}}};
1862 static stringop_algs generic_memset[2] = {
1863 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1864 {-1, libcall, false}}},
1865 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1866 {-1, libcall, false}}}};
1868 struct processor_costs generic_cost = {
1869 COSTS_N_INSNS (1), /* cost of an add instruction */
1870 /* On all chips taken into consideration lea is 2 cycles and more. With
1871 this cost however our current implementation of synth_mult results in
1872 use of unnecessary temporary registers causing regression on several
1873 SPECfp benchmarks. */
1874 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1875 COSTS_N_INSNS (1), /* variable shift costs */
1876 COSTS_N_INSNS (1), /* constant shift costs */
1877 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1878 COSTS_N_INSNS (4), /* HI */
1879 COSTS_N_INSNS (3), /* SI */
1880 COSTS_N_INSNS (4), /* DI */
1881 COSTS_N_INSNS (2)}, /* other */
1882 0, /* cost of multiply per each bit set */
1883 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1884 COSTS_N_INSNS (26), /* HI */
1885 COSTS_N_INSNS (42), /* SI */
1886 COSTS_N_INSNS (74), /* DI */
1887 COSTS_N_INSNS (74)}, /* other */
1888 COSTS_N_INSNS (1), /* cost of movsx */
1889 COSTS_N_INSNS (1), /* cost of movzx */
1890 8, /* "large" insn */
1891 17, /* MOVE_RATIO */
1892 4, /* cost for loading QImode using movzbl */
1893 {4, 4, 4}, /* cost of loading integer registers
1894 in QImode, HImode and SImode.
1895 Relative to reg-reg move (2). */
1896 {4, 4, 4}, /* cost of storing integer registers */
1897 4, /* cost of reg,reg fld/fst */
1898 {12, 12, 12}, /* cost of loading fp registers
1899 in SFmode, DFmode and XFmode */
1900 {6, 6, 8}, /* cost of storing fp registers
1901 in SFmode, DFmode and XFmode */
1902 2, /* cost of moving MMX register */
1903 {8, 8}, /* cost of loading MMX registers
1904 in SImode and DImode */
1905 {8, 8}, /* cost of storing MMX registers
1906 in SImode and DImode */
1907 2, /* cost of moving SSE register */
1908 {8, 8, 8}, /* cost of loading SSE registers
1909 in SImode, DImode and TImode */
1910 {8, 8, 8}, /* cost of storing SSE registers
1911 in SImode, DImode and TImode */
1912 5, /* MMX or SSE register to integer */
1913 32, /* size of l1 cache. */
1914 512, /* size of l2 cache. */
1915 64, /* size of prefetch block */
1916 6, /* number of parallel prefetches */
1917 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1918 value is increased to perhaps more appropriate value of 5. */
1919 3, /* Branch cost */
1920 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1921 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1922 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1923 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1924 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1925 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1928 1, /* scalar_stmt_cost. */
1929 1, /* scalar load_cost. */
1930 1, /* scalar_store_cost. */
1931 1, /* vec_stmt_cost. */
1932 1, /* vec_to_scalar_cost. */
1933 1, /* scalar_to_vec_cost. */
1934 1, /* vec_align_load_cost. */
1935 2, /* vec_unalign_load_cost. */
1936 1, /* vec_store_cost. */
1937 3, /* cond_taken_branch_cost. */
1938 1, /* cond_not_taken_branch_cost. */
1941 /* core_cost should produce code tuned for Core familly of CPUs. */
1942 static stringop_algs core_memcpy[2] = {
1943 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1944 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1945 {-1, libcall, false}}}};
1946 static stringop_algs core_memset[2] = {
1947 {libcall, {{6, loop_1_byte, true},
1949 {8192, rep_prefix_4_byte, true},
1950 {-1, libcall, false}}},
1951 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1952 {-1, libcall, false}}}};
1955 struct processor_costs core_cost = {
1956 COSTS_N_INSNS (1), /* cost of an add instruction */
1957 /* On all chips taken into consideration lea is 2 cycles and more. With
1958 this cost however our current implementation of synth_mult results in
1959 use of unnecessary temporary registers causing regression on several
1960 SPECfp benchmarks. */
1961 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1962 COSTS_N_INSNS (1), /* variable shift costs */
1963 COSTS_N_INSNS (1), /* constant shift costs */
1964 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1965 COSTS_N_INSNS (4), /* HI */
1966 COSTS_N_INSNS (3), /* SI */
1967 COSTS_N_INSNS (4), /* DI */
1968 COSTS_N_INSNS (2)}, /* other */
1969 0, /* cost of multiply per each bit set */
1970 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1971 COSTS_N_INSNS (26), /* HI */
1972 COSTS_N_INSNS (42), /* SI */
1973 COSTS_N_INSNS (74), /* DI */
1974 COSTS_N_INSNS (74)}, /* other */
1975 COSTS_N_INSNS (1), /* cost of movsx */
1976 COSTS_N_INSNS (1), /* cost of movzx */
1977 8, /* "large" insn */
1978 17, /* MOVE_RATIO */
1979 4, /* cost for loading QImode using movzbl */
1980 {4, 4, 4}, /* cost of loading integer registers
1981 in QImode, HImode and SImode.
1982 Relative to reg-reg move (2). */
1983 {4, 4, 4}, /* cost of storing integer registers */
1984 4, /* cost of reg,reg fld/fst */
1985 {12, 12, 12}, /* cost of loading fp registers
1986 in SFmode, DFmode and XFmode */
1987 {6, 6, 8}, /* cost of storing fp registers
1988 in SFmode, DFmode and XFmode */
1989 2, /* cost of moving MMX register */
1990 {8, 8}, /* cost of loading MMX registers
1991 in SImode and DImode */
1992 {8, 8}, /* cost of storing MMX registers
1993 in SImode and DImode */
1994 2, /* cost of moving SSE register */
1995 {8, 8, 8}, /* cost of loading SSE registers
1996 in SImode, DImode and TImode */
1997 {8, 8, 8}, /* cost of storing SSE registers
1998 in SImode, DImode and TImode */
1999 5, /* MMX or SSE register to integer */
2000 64, /* size of l1 cache. */
2001 512, /* size of l2 cache. */
2002 64, /* size of prefetch block */
2003 6, /* number of parallel prefetches */
2004 /* FIXME perhaps more appropriate value is 5. */
2005 3, /* Branch cost */
2006 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2007 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2008 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2009 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2010 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2011 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2014 1, /* scalar_stmt_cost. */
2015 1, /* scalar load_cost. */
2016 1, /* scalar_store_cost. */
2017 1, /* vec_stmt_cost. */
2018 1, /* vec_to_scalar_cost. */
2019 1, /* scalar_to_vec_cost. */
2020 1, /* vec_align_load_cost. */
2021 2, /* vec_unalign_load_cost. */
2022 1, /* vec_store_cost. */
2023 3, /* cond_taken_branch_cost. */
2024 1, /* cond_not_taken_branch_cost. */
2028 /* Set by -mtune. */
2029 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2031 /* Set by -mtune or -Os. */
2032 const struct processor_costs *ix86_cost = &pentium_cost;
2034 /* Processor feature/optimization bitmasks. */
2035 #define m_386 (1<<PROCESSOR_I386)
2036 #define m_486 (1<<PROCESSOR_I486)
2037 #define m_PENT (1<<PROCESSOR_PENTIUM)
2038 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2039 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2040 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2041 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2042 #define m_CORE2 (1<<PROCESSOR_CORE2)
2043 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2044 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2045 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2046 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2047 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2048 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2049 #define m_KNL (1<<PROCESSOR_KNL)
2050 #define m_INTEL (1<<PROCESSOR_INTEL)
2052 #define m_GEODE (1<<PROCESSOR_GEODE)
2053 #define m_K6 (1<<PROCESSOR_K6)
2054 #define m_K6_GEODE (m_K6 | m_GEODE)
2055 #define m_K8 (1<<PROCESSOR_K8)
2056 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2057 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2058 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2059 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2060 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2061 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2062 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2063 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2064 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2065 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2066 #define m_BTVER (m_BTVER1 | m_BTVER2)
2067 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2069 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2071 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2073 #define DEF_TUNE(tune, name, selector) name,
2074 #include "x86-tune.def"
2078 /* Feature tests against the various tunings. */
2079 unsigned char ix86_tune_features[X86_TUNE_LAST];
2081 /* Feature tests against the various tunings used to create ix86_tune_features
2082 based on the processor mask. */
2083 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2085 #define DEF_TUNE(tune, name, selector) selector,
2086 #include "x86-tune.def"
2090 /* Feature tests against the various architecture variations. */
2091 unsigned char ix86_arch_features[X86_ARCH_LAST];
2093 /* Feature tests against the various architecture variations, used to create
2094 ix86_arch_features based on the processor mask. */
2095 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2096 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2097 ~(m_386 | m_486 | m_PENT | m_K6),
2099 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2102 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2105 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2108 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2112 /* In case the average insn count for single function invocation is
2113 lower than this constant, emit fast (but longer) prologue and
2115 #define FAST_PROLOGUE_INSN_COUNT 20
2117 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2118 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2119 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2120 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2122 /* Array of the smallest class containing reg number REGNO, indexed by
2123 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2125 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2127 /* ax, dx, cx, bx */
2128 AREG, DREG, CREG, BREG,
2129 /* si, di, bp, sp */
2130 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2132 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2133 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2136 /* flags, fpsr, fpcr, frame */
2137 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2139 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2142 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2145 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2146 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2147 /* SSE REX registers */
2148 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2150 /* AVX-512 SSE registers */
2151 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2152 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2153 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2154 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2155 /* Mask registers. */
2156 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2157 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2158 /* MPX bound registers */
2159 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2162 /* The "default" register map used in 32bit mode. */
2164 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2166 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2167 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2168 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2169 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2170 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2171 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2172 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2173 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2174 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2175 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2176 101, 102, 103, 104, /* bound registers */
2179 /* The "default" register map used in 64bit mode. */
2181 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2183 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2184 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2185 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2186 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2187 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2188 8,9,10,11,12,13,14,15, /* extended integer registers */
2189 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2190 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2191 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2192 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2193 126, 127, 128, 129, /* bound registers */
2196 /* Define the register numbers to be used in Dwarf debugging information.
2197 The SVR4 reference port C compiler uses the following register numbers
2198 in its Dwarf output code:
2199 0 for %eax (gcc regno = 0)
2200 1 for %ecx (gcc regno = 2)
2201 2 for %edx (gcc regno = 1)
2202 3 for %ebx (gcc regno = 3)
2203 4 for %esp (gcc regno = 7)
2204 5 for %ebp (gcc regno = 6)
2205 6 for %esi (gcc regno = 4)
2206 7 for %edi (gcc regno = 5)
2207 The following three DWARF register numbers are never generated by
2208 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2209 believes these numbers have these meanings.
2210 8 for %eip (no gcc equivalent)
2211 9 for %eflags (gcc regno = 17)
2212 10 for %trapno (no gcc equivalent)
2213 It is not at all clear how we should number the FP stack registers
2214 for the x86 architecture. If the version of SDB on x86/svr4 were
2215 a bit less brain dead with respect to floating-point then we would
2216 have a precedent to follow with respect to DWARF register numbers
2217 for x86 FP registers, but the SDB on x86/svr4 is so completely
2218 broken with respect to FP registers that it is hardly worth thinking
2219 of it as something to strive for compatibility with.
2220 The version of x86/svr4 SDB I have at the moment does (partially)
2221 seem to believe that DWARF register number 11 is associated with
2222 the x86 register %st(0), but that's about all. Higher DWARF
2223 register numbers don't seem to be associated with anything in
2224 particular, and even for DWARF regno 11, SDB only seems to under-
2225 stand that it should say that a variable lives in %st(0) (when
2226 asked via an `=' command) if we said it was in DWARF regno 11,
2227 but SDB still prints garbage when asked for the value of the
2228 variable in question (via a `/' command).
2229 (Also note that the labels SDB prints for various FP stack regs
2230 when doing an `x' command are all wrong.)
2231 Note that these problems generally don't affect the native SVR4
2232 C compiler because it doesn't allow the use of -O with -g and
2233 because when it is *not* optimizing, it allocates a memory
2234 location for each floating-point variable, and the memory
2235 location is what gets described in the DWARF AT_location
2236 attribute for the variable in question.
2237 Regardless of the severe mental illness of the x86/svr4 SDB, we
2238 do something sensible here and we use the following DWARF
2239 register numbers. Note that these are all stack-top-relative
2241 11 for %st(0) (gcc regno = 8)
2242 12 for %st(1) (gcc regno = 9)
2243 13 for %st(2) (gcc regno = 10)
2244 14 for %st(3) (gcc regno = 11)
2245 15 for %st(4) (gcc regno = 12)
2246 16 for %st(5) (gcc regno = 13)
2247 17 for %st(6) (gcc regno = 14)
2248 18 for %st(7) (gcc regno = 15)
2250 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2252 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2253 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2254 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2255 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2256 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2257 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2258 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2259 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2260 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2261 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2262 101, 102, 103, 104, /* bound registers */
2265 /* Define parameter passing and return registers. */
2267 static int const x86_64_int_parameter_registers[6] =
2269 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2272 static int const x86_64_ms_abi_int_parameter_registers[4] =
2274 CX_REG, DX_REG, R8_REG, R9_REG
2277 static int const x86_64_int_return_registers[4] =
2279 AX_REG, DX_REG, DI_REG, SI_REG
2282 /* Additional registers that are clobbered by SYSV calls. */
2284 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2288 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2289 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2292 /* Define the structure for the machine field in struct function. */
2294 struct GTY(()) stack_local_entry {
2295 unsigned short mode;
2298 struct stack_local_entry *next;
2301 /* Structure describing stack frame layout.
2302 Stack grows downward:
2308 saved static chain if ix86_static_chain_on_stack
2310 saved frame pointer if frame_pointer_needed
2311 <- HARD_FRAME_POINTER
2317 <- sse_regs_save_offset
2320 [va_arg registers] |
2324 [padding2] | = to_allocate
2333 int outgoing_arguments_size;
2335 /* The offsets relative to ARG_POINTER. */
2336 HOST_WIDE_INT frame_pointer_offset;
2337 HOST_WIDE_INT hard_frame_pointer_offset;
2338 HOST_WIDE_INT stack_pointer_offset;
2339 HOST_WIDE_INT hfp_save_offset;
2340 HOST_WIDE_INT reg_save_offset;
2341 HOST_WIDE_INT sse_reg_save_offset;
2343 /* When save_regs_using_mov is set, emit prologue using
2344 move instead of push instructions. */
2345 bool save_regs_using_mov;
2348 /* Which cpu are we scheduling for. */
2349 enum attr_cpu ix86_schedule;
2351 /* Which cpu are we optimizing for. */
2352 enum processor_type ix86_tune;
2354 /* Which instruction set architecture to use. */
2355 enum processor_type ix86_arch;
2357 /* True if processor has SSE prefetch instruction. */
2358 unsigned char x86_prefetch_sse;
2360 /* -mstackrealign option */
2361 static const char ix86_force_align_arg_pointer_string[]
2362 = "force_align_arg_pointer";
2364 static rtx (*ix86_gen_leave) (void);
2365 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2366 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2367 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2368 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2369 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2370 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2371 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2372 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2373 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2374 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2375 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2377 /* Preferred alignment for stack boundary in bits. */
2378 unsigned int ix86_preferred_stack_boundary;
2380 /* Alignment for incoming stack boundary in bits specified at
2382 static unsigned int ix86_user_incoming_stack_boundary;
2384 /* Default alignment for incoming stack boundary in bits. */
2385 static unsigned int ix86_default_incoming_stack_boundary;
2387 /* Alignment for incoming stack boundary in bits. */
2388 unsigned int ix86_incoming_stack_boundary;
2390 /* Calling abi specific va_list type nodes. */
2391 static GTY(()) tree sysv_va_list_type_node;
2392 static GTY(()) tree ms_va_list_type_node;
2394 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2395 char internal_label_prefix[16];
2396 int internal_label_prefix_len;
2398 /* Fence to use after loop using movnt. */
2401 /* Register class used for passing given 64bit part of the argument.
2402 These represent classes as documented by the PS ABI, with the exception
2403 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2404 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2406 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2407 whenever possible (upper half does contain padding). */
2408 enum x86_64_reg_class
2411 X86_64_INTEGER_CLASS,
2412 X86_64_INTEGERSI_CLASS,
2419 X86_64_COMPLEX_X87_CLASS,
2423 #define MAX_CLASSES 8
2425 /* Table of constants used by fldpi, fldln2, etc.... */
2426 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2427 static bool ext_80387_constants_init = 0;
2430 static struct machine_function * ix86_init_machine_status (void);
2431 static rtx ix86_function_value (const_tree, const_tree, bool);
2432 static bool ix86_function_value_regno_p (const unsigned int);
2433 static unsigned int ix86_function_arg_boundary (machine_mode,
2435 static rtx ix86_static_chain (const_tree, bool);
2436 static int ix86_function_regparm (const_tree, const_tree);
2437 static void ix86_compute_frame_layout (struct ix86_frame *);
2438 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2440 static void ix86_add_new_builtins (HOST_WIDE_INT);
2441 static tree ix86_canonical_va_list_type (tree);
2442 static void predict_jump (int);
2443 static unsigned int split_stack_prologue_scratch_regno (void);
2444 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2446 enum ix86_function_specific_strings
2448 IX86_FUNCTION_SPECIFIC_ARCH,
2449 IX86_FUNCTION_SPECIFIC_TUNE,
2450 IX86_FUNCTION_SPECIFIC_MAX
2453 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2454 const char *, enum fpmath_unit, bool);
2455 static void ix86_function_specific_save (struct cl_target_option *,
2456 struct gcc_options *opts);
2457 static void ix86_function_specific_restore (struct gcc_options *opts,
2458 struct cl_target_option *);
2459 static void ix86_function_specific_print (FILE *, int,
2460 struct cl_target_option *);
2461 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2462 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2463 struct gcc_options *,
2464 struct gcc_options *,
2465 struct gcc_options *);
2466 static bool ix86_can_inline_p (tree, tree);
2467 static void ix86_set_current_function (tree);
2468 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2470 static enum calling_abi ix86_function_abi (const_tree);
2473 #ifndef SUBTARGET32_DEFAULT_CPU
2474 #define SUBTARGET32_DEFAULT_CPU "i386"
2477 /* Whether -mtune= or -march= were specified */
2478 static int ix86_tune_defaulted;
2479 static int ix86_arch_specified;
2481 /* Vectorization library interface and handlers. */
2482 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2484 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2485 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2487 /* Processor target table, indexed by processor number */
2490 const char *const name; /* processor name */
2491 const struct processor_costs *cost; /* Processor costs */
2492 const int align_loop; /* Default alignments. */
2493 const int align_loop_max_skip;
2494 const int align_jump;
2495 const int align_jump_max_skip;
2496 const int align_func;
2499 /* This table must be in sync with enum processor_type in i386.h. */
2500 static const struct ptt processor_target_table[PROCESSOR_max] =
2502 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2503 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2504 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2505 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2506 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2507 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2508 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2509 {"core2", &core_cost, 16, 10, 16, 10, 16},
2510 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2511 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2512 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2513 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2514 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2515 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2516 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2517 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2518 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2519 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2520 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2521 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2522 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2523 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2524 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2525 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2526 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2527 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2531 rest_of_handle_insert_vzeroupper (void)
2535 /* vzeroupper instructions are inserted immediately after reload to
2536 account for possible spills from 256bit registers. The pass
2537 reuses mode switching infrastructure by re-running mode insertion
2538 pass, so disable entities that have already been processed. */
2539 for (i = 0; i < MAX_386_ENTITIES; i++)
2540 ix86_optimize_mode_switching[i] = 0;
2542 ix86_optimize_mode_switching[AVX_U128] = 1;
2544 /* Call optimize_mode_switching. */
2545 g->get_passes ()->execute_pass_mode_switching ();
2551 const pass_data pass_data_insert_vzeroupper =
2553 RTL_PASS, /* type */
2554 "vzeroupper", /* name */
2555 OPTGROUP_NONE, /* optinfo_flags */
2556 TV_NONE, /* tv_id */
2557 0, /* properties_required */
2558 0, /* properties_provided */
2559 0, /* properties_destroyed */
2560 0, /* todo_flags_start */
2561 TODO_df_finish, /* todo_flags_finish */
2564 class pass_insert_vzeroupper : public rtl_opt_pass
2567 pass_insert_vzeroupper(gcc::context *ctxt)
2568 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2571 /* opt_pass methods: */
2572 virtual bool gate (function *)
2574 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2577 virtual unsigned int execute (function *)
2579 return rest_of_handle_insert_vzeroupper ();
2582 }; // class pass_insert_vzeroupper
2587 make_pass_insert_vzeroupper (gcc::context *ctxt)
2589 return new pass_insert_vzeroupper (ctxt);
2592 /* Return true if a red-zone is in use. */
2595 ix86_using_red_zone (void)
2597 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2600 /* Return a string that documents the current -m options. The caller is
2601 responsible for freeing the string. */
2604 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2605 const char *tune, enum fpmath_unit fpmath,
2608 struct ix86_target_opts
2610 const char *option; /* option string */
2611 HOST_WIDE_INT mask; /* isa mask options */
2614 /* This table is ordered so that options like -msse4.2 that imply
2615 preceding options while match those first. */
2616 static struct ix86_target_opts isa_opts[] =
2618 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2619 { "-mfma", OPTION_MASK_ISA_FMA },
2620 { "-mxop", OPTION_MASK_ISA_XOP },
2621 { "-mlwp", OPTION_MASK_ISA_LWP },
2622 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2623 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2624 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2625 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2626 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2627 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2628 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2629 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2630 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2631 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2632 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2633 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2634 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2635 { "-msse3", OPTION_MASK_ISA_SSE3 },
2636 { "-msse2", OPTION_MASK_ISA_SSE2 },
2637 { "-msse", OPTION_MASK_ISA_SSE },
2638 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2639 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2640 { "-mmmx", OPTION_MASK_ISA_MMX },
2641 { "-mabm", OPTION_MASK_ISA_ABM },
2642 { "-mbmi", OPTION_MASK_ISA_BMI },
2643 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2644 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2645 { "-mhle", OPTION_MASK_ISA_HLE },
2646 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2647 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2648 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2649 { "-madx", OPTION_MASK_ISA_ADX },
2650 { "-mtbm", OPTION_MASK_ISA_TBM },
2651 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2652 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2653 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2654 { "-maes", OPTION_MASK_ISA_AES },
2655 { "-msha", OPTION_MASK_ISA_SHA },
2656 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2657 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2658 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2659 { "-mf16c", OPTION_MASK_ISA_F16C },
2660 { "-mrtm", OPTION_MASK_ISA_RTM },
2661 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2662 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2663 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2664 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2665 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2666 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2667 { "-mmpx", OPTION_MASK_ISA_MPX },
2668 { "-mclwb", OPTION_MASK_ISA_CLWB },
2669 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
2673 static struct ix86_target_opts flag_opts[] =
2675 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2676 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2677 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2678 { "-m80387", MASK_80387 },
2679 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2680 { "-malign-double", MASK_ALIGN_DOUBLE },
2681 { "-mcld", MASK_CLD },
2682 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2683 { "-mieee-fp", MASK_IEEE_FP },
2684 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2685 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2686 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2687 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2688 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2689 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2690 { "-mno-red-zone", MASK_NO_RED_ZONE },
2691 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2692 { "-mrecip", MASK_RECIP },
2693 { "-mrtd", MASK_RTD },
2694 { "-msseregparm", MASK_SSEREGPARM },
2695 { "-mstack-arg-probe", MASK_STACK_PROBE },
2696 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2697 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2698 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2699 { "-mvzeroupper", MASK_VZEROUPPER },
2700 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2701 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2702 { "-mprefer-avx128", MASK_PREFER_AVX128},
2705 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2708 char target_other[40];
2718 memset (opts, '\0', sizeof (opts));
2720 /* Add -march= option. */
2723 opts[num][0] = "-march=";
2724 opts[num++][1] = arch;
2727 /* Add -mtune= option. */
2730 opts[num][0] = "-mtune=";
2731 opts[num++][1] = tune;
2734 /* Add -m32/-m64/-mx32. */
2735 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2737 if ((isa & OPTION_MASK_ABI_64) != 0)
2741 isa &= ~ (OPTION_MASK_ISA_64BIT
2742 | OPTION_MASK_ABI_64
2743 | OPTION_MASK_ABI_X32);
2747 opts[num++][0] = abi;
2749 /* Pick out the options in isa options. */
2750 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2752 if ((isa & isa_opts[i].mask) != 0)
2754 opts[num++][0] = isa_opts[i].option;
2755 isa &= ~ isa_opts[i].mask;
2759 if (isa && add_nl_p)
2761 opts[num++][0] = isa_other;
2762 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2766 /* Add flag options. */
2767 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2769 if ((flags & flag_opts[i].mask) != 0)
2771 opts[num++][0] = flag_opts[i].option;
2772 flags &= ~ flag_opts[i].mask;
2776 if (flags && add_nl_p)
2778 opts[num++][0] = target_other;
2779 sprintf (target_other, "(other flags: %#x)", flags);
2782 /* Add -fpmath= option. */
2785 opts[num][0] = "-mfpmath=";
2786 switch ((int) fpmath)
2789 opts[num++][1] = "387";
2793 opts[num++][1] = "sse";
2796 case FPMATH_387 | FPMATH_SSE:
2797 opts[num++][1] = "sse+387";
2809 gcc_assert (num < ARRAY_SIZE (opts));
2811 /* Size the string. */
2813 sep_len = (add_nl_p) ? 3 : 1;
2814 for (i = 0; i < num; i++)
2817 for (j = 0; j < 2; j++)
2819 len += strlen (opts[i][j]);
2822 /* Build the string. */
2823 ret = ptr = (char *) xmalloc (len);
2826 for (i = 0; i < num; i++)
2830 for (j = 0; j < 2; j++)
2831 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2838 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2846 for (j = 0; j < 2; j++)
2849 memcpy (ptr, opts[i][j], len2[j]);
2851 line_len += len2[j];
2856 gcc_assert (ret + len >= ptr);
2861 /* Return true, if profiling code should be emitted before
2862 prologue. Otherwise it returns false.
2863 Note: For x86 with "hotfix" it is sorried. */
2865 ix86_profile_before_prologue (void)
2867 return flag_fentry != 0;
2870 /* Function that is callable from the debugger to print the current
2872 void ATTRIBUTE_UNUSED
2873 ix86_debug_options (void)
2875 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2876 ix86_arch_string, ix86_tune_string,
2881 fprintf (stderr, "%s\n\n", opts);
2885 fputs ("<no options>\n\n", stderr);
2890 static const char *stringop_alg_names[] = {
2892 #define DEF_ALG(alg, name) #name,
2893 #include "stringop.def"
2898 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2899 The string is of the following form (or comma separated list of it):
2901 strategy_alg:max_size:[align|noalign]
2903 where the full size range for the strategy is either [0, max_size] or
2904 [min_size, max_size], in which min_size is the max_size + 1 of the
2905 preceding range. The last size range must have max_size == -1.
2910 -mmemcpy-strategy=libcall:-1:noalign
2912 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2916 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2918 This is to tell the compiler to use the following strategy for memset
2919 1) when the expected size is between [1, 16], use rep_8byte strategy;
2920 2) when the size is between [17, 2048], use vector_loop;
2921 3) when the size is > 2048, use libcall. */
2923 struct stringop_size_range
2931 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2933 const struct stringop_algs *default_algs;
2934 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2935 char *curr_range_str, *next_range_str;
2939 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2941 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2943 curr_range_str = strategy_str;
2950 next_range_str = strchr (curr_range_str, ',');
2952 *next_range_str++ = '\0';
2954 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2955 alg_name, &maxs, align))
2957 error ("wrong arg %s to option %s", curr_range_str,
2958 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2962 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2964 error ("size ranges of option %s should be increasing",
2965 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2969 for (i = 0; i < last_alg; i++)
2970 if (!strcmp (alg_name, stringop_alg_names[i]))
2975 error ("wrong stringop strategy name %s specified for option %s",
2977 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2981 input_ranges[n].max = maxs;
2982 input_ranges[n].alg = (stringop_alg) i;
2983 if (!strcmp (align, "align"))
2984 input_ranges[n].noalign = false;
2985 else if (!strcmp (align, "noalign"))
2986 input_ranges[n].noalign = true;
2989 error ("unknown alignment %s specified for option %s",
2990 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2994 curr_range_str = next_range_str;
2996 while (curr_range_str);
2998 if (input_ranges[n - 1].max != -1)
3000 error ("the max value for the last size range should be -1"
3002 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3006 if (n > MAX_STRINGOP_ALGS)
3008 error ("too many size ranges specified in option %s",
3009 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3013 /* Now override the default algs array. */
3014 for (i = 0; i < n; i++)
3016 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3017 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3018 = input_ranges[i].alg;
3019 *const_cast<int *>(&default_algs->size[i].noalign)
3020 = input_ranges[i].noalign;
3025 /* parse -mtune-ctrl= option. When DUMP is true,
3026 print the features that are explicitly set. */
3029 parse_mtune_ctrl_str (bool dump)
3031 if (!ix86_tune_ctrl_string)
3034 char *next_feature_string = NULL;
3035 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3036 char *orig = curr_feature_string;
3042 next_feature_string = strchr (curr_feature_string, ',');
3043 if (next_feature_string)
3044 *next_feature_string++ = '\0';
3045 if (*curr_feature_string == '^')
3047 curr_feature_string++;
3050 for (i = 0; i < X86_TUNE_LAST; i++)
3052 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3054 ix86_tune_features[i] = !clear;
3056 fprintf (stderr, "Explicitly %s feature %s\n",
3057 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3061 if (i == X86_TUNE_LAST)
3062 error ("Unknown parameter to option -mtune-ctrl: %s",
3063 clear ? curr_feature_string - 1 : curr_feature_string);
3064 curr_feature_string = next_feature_string;
3066 while (curr_feature_string);
3070 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3074 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3076 unsigned int ix86_tune_mask = 1u << ix86_tune;
3079 for (i = 0; i < X86_TUNE_LAST; ++i)
3081 if (ix86_tune_no_default)
3082 ix86_tune_features[i] = 0;
3084 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3089 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3090 for (i = 0; i < X86_TUNE_LAST; i++)
3091 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3092 ix86_tune_features[i] ? "on" : "off");
3095 parse_mtune_ctrl_str (dump);
3099 /* Override various settings based on options. If MAIN_ARGS_P, the
3100 options are from the command line, otherwise they are from
3104 ix86_option_override_internal (bool main_args_p,
3105 struct gcc_options *opts,
3106 struct gcc_options *opts_set)
3109 unsigned int ix86_arch_mask;
3110 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3115 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3116 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3117 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3118 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3119 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3120 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3121 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3122 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3123 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3124 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3125 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3126 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3127 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3128 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3129 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3130 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3131 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3132 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3133 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3134 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3135 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3136 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3137 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3138 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3139 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3140 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3141 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3142 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3143 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3144 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3145 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3146 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3147 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3148 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3149 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3150 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3151 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3152 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3153 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3154 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3155 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3156 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3157 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3158 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3159 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3160 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3161 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3162 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3163 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3164 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3165 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3166 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3167 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3168 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3169 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3170 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3171 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
3174 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3175 | PTA_CX16 | PTA_FXSR)
3176 #define PTA_NEHALEM \
3177 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3178 #define PTA_WESTMERE \
3179 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3180 #define PTA_SANDYBRIDGE \
3181 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3182 #define PTA_IVYBRIDGE \
3183 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3184 #define PTA_HASWELL \
3185 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3186 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3187 #define PTA_BROADWELL \
3188 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3190 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3191 #define PTA_BONNELL \
3192 (PTA_CORE2 | PTA_MOVBE)
3193 #define PTA_SILVERMONT \
3194 (PTA_WESTMERE | PTA_MOVBE)
3196 /* if this reaches 64, need to widen struct pta flags below */
3200 const char *const name; /* processor name or nickname. */
3201 const enum processor_type processor;
3202 const enum attr_cpu schedule;
3203 const unsigned HOST_WIDE_INT flags;
3205 const processor_alias_table[] =
3207 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3208 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3209 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3210 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3211 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3212 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3213 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3214 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3215 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3216 PTA_MMX | PTA_SSE | PTA_FXSR},
3217 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3218 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3219 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3220 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3221 PTA_MMX | PTA_SSE | PTA_FXSR},
3222 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3223 PTA_MMX | PTA_SSE | PTA_FXSR},
3224 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3225 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3226 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3227 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3228 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3229 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3230 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3231 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3232 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3233 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3234 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3235 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3236 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3237 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3238 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3239 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3241 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3243 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3245 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3247 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3248 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3249 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3250 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3251 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3252 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3253 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3254 {"knl", PROCESSOR_KNL, CPU_KNL, PTA_KNL},
3255 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3256 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3257 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3258 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3259 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3260 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3261 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3262 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3263 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3264 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3265 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3266 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3267 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3268 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3269 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3270 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3271 {"x86-64", PROCESSOR_K8, CPU_K8,
3272 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3273 {"k8", PROCESSOR_K8, CPU_K8,
3274 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3275 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3276 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3277 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3278 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3279 {"opteron", PROCESSOR_K8, CPU_K8,
3280 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3281 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3282 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3283 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3284 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3285 {"athlon64", PROCESSOR_K8, CPU_K8,
3286 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3287 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3288 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3289 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3290 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3291 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3292 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3293 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3294 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3295 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3296 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3297 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3298 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3299 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3300 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3301 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3302 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3303 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3304 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3305 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3306 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3307 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3308 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3309 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3310 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3311 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3312 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3313 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3314 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3315 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3316 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3317 | PTA_XSAVEOPT | PTA_FSGSBASE},
3318 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3319 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3320 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3321 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3322 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3323 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3324 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3326 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3327 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3328 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3329 | PTA_FXSR | PTA_XSAVE},
3330 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3331 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3332 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3333 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3334 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3335 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3337 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3339 | PTA_HLE /* flags are only used for -march switch. */ },
3342 /* -mrecip options. */
3345 const char *string; /* option name */
3346 unsigned int mask; /* mask bits to set */
3348 const recip_options[] =
3350 { "all", RECIP_MASK_ALL },
3351 { "none", RECIP_MASK_NONE },
3352 { "div", RECIP_MASK_DIV },
3353 { "sqrt", RECIP_MASK_SQRT },
3354 { "vec-div", RECIP_MASK_VEC_DIV },
3355 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3358 int const pta_size = ARRAY_SIZE (processor_alias_table);
3360 /* Set up prefix/suffix so the error messages refer to either the command
3361 line argument, or the attribute(target). */
3370 prefix = "option(\"";
3375 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3376 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3377 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3378 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3379 #ifdef TARGET_BI_ARCH
3382 #if TARGET_BI_ARCH == 1
3383 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3384 is on and OPTION_MASK_ABI_X32 is off. We turn off
3385 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3387 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3388 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3390 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3391 on and OPTION_MASK_ABI_64 is off. We turn off
3392 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3393 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3394 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3395 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3396 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3401 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3403 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3404 OPTION_MASK_ABI_64 for TARGET_X32. */
3405 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3406 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3408 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3409 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3410 | OPTION_MASK_ABI_X32
3411 | OPTION_MASK_ABI_64);
3412 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3414 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3415 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3416 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3417 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3420 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3421 SUBTARGET_OVERRIDE_OPTIONS;
3424 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3425 SUBSUBTARGET_OVERRIDE_OPTIONS;
3428 /* -fPIC is the default for x86_64. */
3429 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3430 opts->x_flag_pic = 2;
3432 /* Need to check -mtune=generic first. */
3433 if (opts->x_ix86_tune_string)
3435 /* As special support for cross compilers we read -mtune=native
3436 as -mtune=generic. With native compilers we won't see the
3437 -mtune=native, as it was changed by the driver. */
3438 if (!strcmp (opts->x_ix86_tune_string, "native"))
3440 opts->x_ix86_tune_string = "generic";
3442 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3443 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3444 "%stune=k8%s or %stune=generic%s instead as appropriate",
3445 prefix, suffix, prefix, suffix, prefix, suffix);
3449 if (opts->x_ix86_arch_string)
3450 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3451 if (!opts->x_ix86_tune_string)
3453 opts->x_ix86_tune_string
3454 = processor_target_table[TARGET_CPU_DEFAULT].name;
3455 ix86_tune_defaulted = 1;
3458 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3459 or defaulted. We need to use a sensible tune option. */
3460 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3462 opts->x_ix86_tune_string = "generic";
3466 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3467 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3469 /* rep; movq isn't available in 32-bit code. */
3470 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3471 opts->x_ix86_stringop_alg = no_stringop;
3474 if (!opts->x_ix86_arch_string)
3475 opts->x_ix86_arch_string
3476 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3477 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3479 ix86_arch_specified = 1;
3481 if (opts_set->x_ix86_pmode)
3483 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3484 && opts->x_ix86_pmode == PMODE_SI)
3485 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3486 && opts->x_ix86_pmode == PMODE_DI))
3487 error ("address mode %qs not supported in the %s bit mode",
3488 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3489 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3492 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3493 ? PMODE_DI : PMODE_SI;
3495 if (!opts_set->x_ix86_abi)
3496 opts->x_ix86_abi = DEFAULT_ABI;
3498 /* For targets using ms ABI enable ms-extensions, if not
3499 explicit turned off. For non-ms ABI we turn off this
3501 if (!opts_set->x_flag_ms_extensions)
3502 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3504 if (opts_set->x_ix86_cmodel)
3506 switch (opts->x_ix86_cmodel)
3510 if (opts->x_flag_pic)
3511 opts->x_ix86_cmodel = CM_SMALL_PIC;
3512 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3513 error ("code model %qs not supported in the %s bit mode",
3519 if (opts->x_flag_pic)
3520 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3521 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3522 error ("code model %qs not supported in the %s bit mode",
3524 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3525 error ("code model %qs not supported in x32 mode",
3531 if (opts->x_flag_pic)
3532 opts->x_ix86_cmodel = CM_LARGE_PIC;
3533 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3534 error ("code model %qs not supported in the %s bit mode",
3536 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3537 error ("code model %qs not supported in x32 mode",
3542 if (opts->x_flag_pic)
3543 error ("code model %s does not support PIC mode", "32");
3544 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3545 error ("code model %qs not supported in the %s bit mode",
3550 if (opts->x_flag_pic)
3552 error ("code model %s does not support PIC mode", "kernel");
3553 opts->x_ix86_cmodel = CM_32;
3555 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3556 error ("code model %qs not supported in the %s bit mode",
3566 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3567 use of rip-relative addressing. This eliminates fixups that
3568 would otherwise be needed if this object is to be placed in a
3569 DLL, and is essentially just as efficient as direct addressing. */
3570 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3571 && (TARGET_RDOS || TARGET_PECOFF))
3572 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3573 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3574 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3576 opts->x_ix86_cmodel = CM_32;
3578 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3580 error ("-masm=intel not supported in this configuration");
3581 opts->x_ix86_asm_dialect = ASM_ATT;
3583 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3584 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3585 sorry ("%i-bit mode not compiled in",
3586 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3588 for (i = 0; i < pta_size; i++)
3589 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3591 ix86_schedule = processor_alias_table[i].schedule;
3592 ix86_arch = processor_alias_table[i].processor;
3593 /* Default cpu tuning to the architecture. */
3594 ix86_tune = ix86_arch;
3596 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3597 && !(processor_alias_table[i].flags & PTA_64BIT))
3598 error ("CPU you selected does not support x86-64 "
3601 if (processor_alias_table[i].flags & PTA_MMX
3602 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3603 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3604 if (processor_alias_table[i].flags & PTA_3DNOW
3605 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3606 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3607 if (processor_alias_table[i].flags & PTA_3DNOW_A
3608 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3609 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3610 if (processor_alias_table[i].flags & PTA_SSE
3611 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3612 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3613 if (processor_alias_table[i].flags & PTA_SSE2
3614 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3615 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3616 if (processor_alias_table[i].flags & PTA_SSE3
3617 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3618 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3619 if (processor_alias_table[i].flags & PTA_SSSE3
3620 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3621 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3622 if (processor_alias_table[i].flags & PTA_SSE4_1
3623 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3624 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3625 if (processor_alias_table[i].flags & PTA_SSE4_2
3626 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3627 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3628 if (processor_alias_table[i].flags & PTA_AVX
3629 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3630 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3631 if (processor_alias_table[i].flags & PTA_AVX2
3632 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3633 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3634 if (processor_alias_table[i].flags & PTA_FMA
3635 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3636 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3637 if (processor_alias_table[i].flags & PTA_SSE4A
3638 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3639 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3640 if (processor_alias_table[i].flags & PTA_FMA4
3641 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3642 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3643 if (processor_alias_table[i].flags & PTA_XOP
3644 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3645 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3646 if (processor_alias_table[i].flags & PTA_LWP
3647 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3648 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3649 if (processor_alias_table[i].flags & PTA_ABM
3650 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3651 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3652 if (processor_alias_table[i].flags & PTA_BMI
3653 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3654 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3655 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3656 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3657 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3658 if (processor_alias_table[i].flags & PTA_TBM
3659 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3660 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3661 if (processor_alias_table[i].flags & PTA_BMI2
3662 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3663 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3664 if (processor_alias_table[i].flags & PTA_CX16
3665 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3666 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3667 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3668 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3669 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3670 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3671 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3672 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3673 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3674 if (processor_alias_table[i].flags & PTA_MOVBE
3675 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3676 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3677 if (processor_alias_table[i].flags & PTA_AES
3678 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3679 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3680 if (processor_alias_table[i].flags & PTA_SHA
3681 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3682 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3683 if (processor_alias_table[i].flags & PTA_PCLMUL
3684 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3685 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3686 if (processor_alias_table[i].flags & PTA_FSGSBASE
3687 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3688 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3689 if (processor_alias_table[i].flags & PTA_RDRND
3690 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3691 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3692 if (processor_alias_table[i].flags & PTA_F16C
3693 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3694 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3695 if (processor_alias_table[i].flags & PTA_RTM
3696 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3697 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3698 if (processor_alias_table[i].flags & PTA_HLE
3699 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3700 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3701 if (processor_alias_table[i].flags & PTA_PRFCHW
3702 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3703 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3704 if (processor_alias_table[i].flags & PTA_RDSEED
3705 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3706 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3707 if (processor_alias_table[i].flags & PTA_ADX
3708 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3709 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3710 if (processor_alias_table[i].flags & PTA_FXSR
3711 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3712 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3713 if (processor_alias_table[i].flags & PTA_XSAVE
3714 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3715 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3716 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3717 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3718 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3719 if (processor_alias_table[i].flags & PTA_AVX512F
3720 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3721 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3722 if (processor_alias_table[i].flags & PTA_AVX512ER
3723 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3724 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3725 if (processor_alias_table[i].flags & PTA_AVX512PF
3726 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3727 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3728 if (processor_alias_table[i].flags & PTA_AVX512CD
3729 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3730 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3731 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3732 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3733 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3734 if (processor_alias_table[i].flags & PTA_PCOMMIT
3735 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3736 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3737 if (processor_alias_table[i].flags & PTA_CLWB
3738 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3739 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3740 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3741 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3742 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3743 if (processor_alias_table[i].flags & PTA_XSAVEC
3744 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3745 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3746 if (processor_alias_table[i].flags & PTA_XSAVES
3747 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3748 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3749 if (processor_alias_table[i].flags & PTA_AVX512DQ
3750 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3751 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3752 if (processor_alias_table[i].flags & PTA_AVX512BW
3753 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3754 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3755 if (processor_alias_table[i].flags & PTA_AVX512VL
3756 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3757 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3758 if (processor_alias_table[i].flags & PTA_MPX
3759 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3760 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3761 if (processor_alias_table[i].flags & PTA_AVX512VBMI
3762 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3763 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3764 if (processor_alias_table[i].flags & PTA_AVX512IFMA
3765 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3766 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3767 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3768 x86_prefetch_sse = true;
3773 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3774 error ("Intel MPX does not support x32");
3776 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3777 error ("Intel MPX does not support x32");
3779 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3780 error ("generic CPU can be used only for %stune=%s %s",
3781 prefix, suffix, sw);
3782 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3783 error ("intel CPU can be used only for %stune=%s %s",
3784 prefix, suffix, sw);
3785 else if (i == pta_size)
3786 error ("bad value (%s) for %sarch=%s %s",
3787 opts->x_ix86_arch_string, prefix, suffix, sw);
3789 ix86_arch_mask = 1u << ix86_arch;
3790 for (i = 0; i < X86_ARCH_LAST; ++i)
3791 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3793 for (i = 0; i < pta_size; i++)
3794 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3796 ix86_schedule = processor_alias_table[i].schedule;
3797 ix86_tune = processor_alias_table[i].processor;
3798 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3800 if (!(processor_alias_table[i].flags & PTA_64BIT))
3802 if (ix86_tune_defaulted)
3804 opts->x_ix86_tune_string = "x86-64";
3805 for (i = 0; i < pta_size; i++)
3806 if (! strcmp (opts->x_ix86_tune_string,
3807 processor_alias_table[i].name))
3809 ix86_schedule = processor_alias_table[i].schedule;
3810 ix86_tune = processor_alias_table[i].processor;
3813 error ("CPU you selected does not support x86-64 "
3817 /* Intel CPUs have always interpreted SSE prefetch instructions as
3818 NOPs; so, we can enable SSE prefetch instructions even when
3819 -mtune (rather than -march) points us to a processor that has them.
3820 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3821 higher processors. */
3823 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3824 x86_prefetch_sse = true;
3828 if (ix86_tune_specified && i == pta_size)
3829 error ("bad value (%s) for %stune=%s %s",
3830 opts->x_ix86_tune_string, prefix, suffix, sw);
3832 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3834 #ifndef USE_IX86_FRAME_POINTER
3835 #define USE_IX86_FRAME_POINTER 0
3838 #ifndef USE_X86_64_FRAME_POINTER
3839 #define USE_X86_64_FRAME_POINTER 0
3842 /* Set the default values for switches whose default depends on TARGET_64BIT
3843 in case they weren't overwritten by command line options. */
3844 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3846 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3847 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3848 if (opts->x_flag_asynchronous_unwind_tables
3849 && !opts_set->x_flag_unwind_tables
3850 && TARGET_64BIT_MS_ABI)
3851 opts->x_flag_unwind_tables = 1;
3852 if (opts->x_flag_asynchronous_unwind_tables == 2)
3853 opts->x_flag_unwind_tables
3854 = opts->x_flag_asynchronous_unwind_tables = 1;
3855 if (opts->x_flag_pcc_struct_return == 2)
3856 opts->x_flag_pcc_struct_return = 0;
3860 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3861 opts->x_flag_omit_frame_pointer
3862 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3863 if (opts->x_flag_asynchronous_unwind_tables == 2)
3864 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3865 if (opts->x_flag_pcc_struct_return == 2)
3866 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3869 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3870 if (opts->x_optimize_size)
3871 ix86_cost = &ix86_size_cost;
3873 ix86_cost = ix86_tune_cost;
3875 /* Arrange to set up i386_stack_locals for all functions. */
3876 init_machine_status = ix86_init_machine_status;
3878 /* Validate -mregparm= value. */
3879 if (opts_set->x_ix86_regparm)
3881 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3882 warning (0, "-mregparm is ignored in 64-bit mode");
3883 if (opts->x_ix86_regparm > REGPARM_MAX)
3885 error ("-mregparm=%d is not between 0 and %d",
3886 opts->x_ix86_regparm, REGPARM_MAX);
3887 opts->x_ix86_regparm = 0;
3890 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3891 opts->x_ix86_regparm = REGPARM_MAX;
3893 /* Default align_* from the processor table. */
3894 if (opts->x_align_loops == 0)
3896 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3897 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3899 if (opts->x_align_jumps == 0)
3901 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3902 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3904 if (opts->x_align_functions == 0)
3906 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3909 /* Provide default for -mbranch-cost= value. */
3910 if (!opts_set->x_ix86_branch_cost)
3911 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3913 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3915 opts->x_target_flags
3916 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3918 /* Enable by default the SSE and MMX builtins. Do allow the user to
3919 explicitly disable any of these. In particular, disabling SSE and
3920 MMX for kernel code is extremely useful. */
3921 if (!ix86_arch_specified)
3922 opts->x_ix86_isa_flags
3923 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3924 | TARGET_SUBTARGET64_ISA_DEFAULT)
3925 & ~opts->x_ix86_isa_flags_explicit);
3927 if (TARGET_RTD_P (opts->x_target_flags))
3928 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3932 opts->x_target_flags
3933 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3935 if (!ix86_arch_specified)
3936 opts->x_ix86_isa_flags
3937 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3939 /* i386 ABI does not specify red zone. It still makes sense to use it
3940 when programmer takes care to stack from being destroyed. */
3941 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3942 opts->x_target_flags |= MASK_NO_RED_ZONE;
3945 /* Keep nonleaf frame pointers. */
3946 if (opts->x_flag_omit_frame_pointer)
3947 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3948 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3949 opts->x_flag_omit_frame_pointer = 1;
3951 /* If we're doing fast math, we don't care about comparison order
3952 wrt NaNs. This lets us use a shorter comparison sequence. */
3953 if (opts->x_flag_finite_math_only)
3954 opts->x_target_flags &= ~MASK_IEEE_FP;
3956 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3957 since the insns won't need emulation. */
3958 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3959 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3961 /* Likewise, if the target doesn't have a 387, or we've specified
3962 software floating point, don't use 387 inline intrinsics. */
3963 if (!TARGET_80387_P (opts->x_target_flags))
3964 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3966 /* Turn on MMX builtins for -msse. */
3967 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3968 opts->x_ix86_isa_flags
3969 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3971 /* Enable SSE prefetch. */
3972 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3973 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3974 x86_prefetch_sse = true;
3976 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3977 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3978 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3979 opts->x_ix86_isa_flags
3980 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3982 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3983 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3984 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3985 opts->x_ix86_isa_flags
3986 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3988 /* Enable lzcnt instruction for -mabm. */
3989 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3990 opts->x_ix86_isa_flags
3991 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3993 /* Validate -mpreferred-stack-boundary= value or default it to
3994 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3995 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3996 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3998 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3999 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
4000 int max = (TARGET_SEH ? 4 : 12);
4002 if (opts->x_ix86_preferred_stack_boundary_arg < min
4003 || opts->x_ix86_preferred_stack_boundary_arg > max)
4006 error ("-mpreferred-stack-boundary is not supported "
4009 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4010 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4013 ix86_preferred_stack_boundary
4014 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4017 /* Set the default value for -mstackrealign. */
4018 if (opts->x_ix86_force_align_arg_pointer == -1)
4019 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4021 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4023 /* Validate -mincoming-stack-boundary= value or default it to
4024 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4025 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4026 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4028 if (opts->x_ix86_incoming_stack_boundary_arg
4029 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
4030 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4031 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4032 opts->x_ix86_incoming_stack_boundary_arg,
4033 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
4036 ix86_user_incoming_stack_boundary
4037 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4038 ix86_incoming_stack_boundary
4039 = ix86_user_incoming_stack_boundary;
4043 #ifndef NO_PROFILE_COUNTERS
4044 if (flag_nop_mcount)
4045 error ("-mnop-mcount is not compatible with this target");
4047 if (flag_nop_mcount && flag_pic)
4048 error ("-mnop-mcount is not implemented for -fPIC");
4050 /* Accept -msseregparm only if at least SSE support is enabled. */
4051 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4052 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4053 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4055 if (opts_set->x_ix86_fpmath)
4057 if (opts->x_ix86_fpmath & FPMATH_SSE)
4059 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4061 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4062 opts->x_ix86_fpmath = FPMATH_387;
4064 else if ((opts->x_ix86_fpmath & FPMATH_387)
4065 && !TARGET_80387_P (opts->x_target_flags))
4067 warning (0, "387 instruction set disabled, using SSE arithmetics");
4068 opts->x_ix86_fpmath = FPMATH_SSE;
4072 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4073 fpmath=387. The second is however default at many targets since the
4074 extra 80bit precision of temporaries is considered to be part of ABI.
4075 Overwrite the default at least for -ffast-math.
4076 TODO: -mfpmath=both seems to produce same performing code with bit
4077 smaller binaries. It is however not clear if register allocation is
4078 ready for this setting.
4079 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4080 codegen. We may switch to 387 with -ffast-math for size optimized
4082 else if (fast_math_flags_set_p (&global_options)
4083 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4084 opts->x_ix86_fpmath = FPMATH_SSE;
4086 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4088 /* If the i387 is disabled, then do not return values in it. */
4089 if (!TARGET_80387_P (opts->x_target_flags))
4090 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4092 /* Use external vectorized library in vectorizing intrinsics. */
4093 if (opts_set->x_ix86_veclibabi_type)
4094 switch (opts->x_ix86_veclibabi_type)
4096 case ix86_veclibabi_type_svml:
4097 ix86_veclib_handler = ix86_veclibabi_svml;
4100 case ix86_veclibabi_type_acml:
4101 ix86_veclib_handler = ix86_veclibabi_acml;
4108 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4109 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4110 && !opts->x_optimize_size)
4111 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4113 /* If stack probes are required, the space used for large function
4114 arguments on the stack must also be probed, so enable
4115 -maccumulate-outgoing-args so this happens in the prologue. */
4116 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4117 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4119 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4120 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4121 "for correctness", prefix, suffix);
4122 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4125 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4128 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4129 p = strchr (internal_label_prefix, 'X');
4130 internal_label_prefix_len = p - internal_label_prefix;
4134 /* When scheduling description is not available, disable scheduler pass
4135 so it won't slow down the compilation and make x87 code slower. */
4136 if (!TARGET_SCHEDULE)
4137 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4139 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4140 ix86_tune_cost->simultaneous_prefetches,
4141 opts->x_param_values,
4142 opts_set->x_param_values);
4143 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4144 ix86_tune_cost->prefetch_block,
4145 opts->x_param_values,
4146 opts_set->x_param_values);
4147 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4148 ix86_tune_cost->l1_cache_size,
4149 opts->x_param_values,
4150 opts_set->x_param_values);
4151 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4152 ix86_tune_cost->l2_cache_size,
4153 opts->x_param_values,
4154 opts_set->x_param_values);
4156 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4157 if (opts->x_flag_prefetch_loop_arrays < 0
4159 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4160 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4161 opts->x_flag_prefetch_loop_arrays = 1;
4163 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4164 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4165 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4166 targetm.expand_builtin_va_start = NULL;
4168 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4170 ix86_gen_leave = gen_leave_rex64;
4171 if (Pmode == DImode)
4173 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4174 ix86_gen_tls_local_dynamic_base_64
4175 = gen_tls_local_dynamic_base_64_di;
4179 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4180 ix86_gen_tls_local_dynamic_base_64
4181 = gen_tls_local_dynamic_base_64_si;
4185 ix86_gen_leave = gen_leave;
4187 if (Pmode == DImode)
4189 ix86_gen_add3 = gen_adddi3;
4190 ix86_gen_sub3 = gen_subdi3;
4191 ix86_gen_sub3_carry = gen_subdi3_carry;
4192 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4193 ix86_gen_andsp = gen_anddi3;
4194 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4195 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4196 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4197 ix86_gen_monitor = gen_sse3_monitor_di;
4201 ix86_gen_add3 = gen_addsi3;
4202 ix86_gen_sub3 = gen_subsi3;
4203 ix86_gen_sub3_carry = gen_subsi3_carry;
4204 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4205 ix86_gen_andsp = gen_andsi3;
4206 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4207 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4208 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4209 ix86_gen_monitor = gen_sse3_monitor_si;
4213 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4214 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4215 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4218 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4220 if (opts->x_flag_fentry > 0)
4221 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4223 opts->x_flag_fentry = 0;
4225 else if (TARGET_SEH)
4227 if (opts->x_flag_fentry == 0)
4228 sorry ("-mno-fentry isn%'t compatible with SEH");
4229 opts->x_flag_fentry = 1;
4231 else if (opts->x_flag_fentry < 0)
4233 #if defined(PROFILE_BEFORE_PROLOGUE)
4234 opts->x_flag_fentry = 1;
4236 opts->x_flag_fentry = 0;
4240 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4241 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4242 AVX unaligned load/store. */
4243 if (!opts->x_optimize_size)
4245 if (flag_expensive_optimizations
4246 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4247 opts->x_target_flags |= MASK_VZEROUPPER;
4248 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4249 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4250 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4251 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4252 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4253 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4254 /* Enable 128-bit AVX instruction generation
4255 for the auto-vectorizer. */
4256 if (TARGET_AVX128_OPTIMAL
4257 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4258 opts->x_target_flags |= MASK_PREFER_AVX128;
4261 if (opts->x_ix86_recip_name)
4263 char *p = ASTRDUP (opts->x_ix86_recip_name);
4265 unsigned int mask, i;
4268 while ((q = strtok (p, ",")) != NULL)
4279 if (!strcmp (q, "default"))
4280 mask = RECIP_MASK_ALL;
4283 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4284 if (!strcmp (q, recip_options[i].string))
4286 mask = recip_options[i].mask;
4290 if (i == ARRAY_SIZE (recip_options))
4292 error ("unknown option for -mrecip=%s", q);
4294 mask = RECIP_MASK_NONE;
4298 opts->x_recip_mask_explicit |= mask;
4300 opts->x_recip_mask &= ~mask;
4302 opts->x_recip_mask |= mask;
4306 if (TARGET_RECIP_P (opts->x_target_flags))
4307 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4308 else if (opts_set->x_target_flags & MASK_RECIP)
4309 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4311 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4312 for 64-bit Bionic. */
4313 if (TARGET_HAS_BIONIC
4314 && !(opts_set->x_target_flags
4315 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4316 opts->x_target_flags |= (TARGET_64BIT
4317 ? MASK_LONG_DOUBLE_128
4318 : MASK_LONG_DOUBLE_64);
4320 /* Only one of them can be active. */
4321 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4322 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4324 /* Save the initial options in case the user does function specific
4327 target_option_default_node = target_option_current_node
4328 = build_target_option_node (opts);
4330 /* Handle stack protector */
4331 if (!opts_set->x_ix86_stack_protector_guard)
4332 opts->x_ix86_stack_protector_guard
4333 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4335 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4336 if (opts->x_ix86_tune_memcpy_strategy)
4338 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4339 ix86_parse_stringop_strategy_string (str, false);
4343 if (opts->x_ix86_tune_memset_strategy)
4345 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4346 ix86_parse_stringop_strategy_string (str, true);
4351 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4354 ix86_option_override (void)
4356 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4357 struct register_pass_info insert_vzeroupper_info
4358 = { pass_insert_vzeroupper, "reload",
4359 1, PASS_POS_INSERT_AFTER
4362 ix86_option_override_internal (true, &global_options, &global_options_set);
4365 /* This needs to be done at start up. It's convenient to do it here. */
4366 register_pass (&insert_vzeroupper_info);
4369 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4371 ix86_offload_options (void)
4374 return xstrdup ("-foffload-abi=lp64");
4375 return xstrdup ("-foffload-abi=ilp32");
4378 /* Update register usage after having seen the compiler flags. */
4381 ix86_conditional_register_usage (void)
4386 /* The PIC register, if it exists, is fixed. */
4387 j = PIC_OFFSET_TABLE_REGNUM;
4388 if (j != INVALID_REGNUM)
4389 fixed_regs[j] = call_used_regs[j] = 1;
4391 /* For 32-bit targets, squash the REX registers. */
4394 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4395 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4396 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4397 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4398 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4399 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4402 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4403 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4404 : TARGET_64BIT ? (1 << 2)
4407 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4409 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4411 /* Set/reset conditionally defined registers from
4412 CALL_USED_REGISTERS initializer. */
4413 if (call_used_regs[i] > 1)
4414 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4416 /* Calculate registers of CLOBBERED_REGS register set
4417 as call used registers from GENERAL_REGS register set. */
4418 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4419 && call_used_regs[i])
4420 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4423 /* If MMX is disabled, squash the registers. */
4425 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4426 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4427 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4429 /* If SSE is disabled, squash the registers. */
4431 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4432 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4433 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4435 /* If the FPU is disabled, squash the registers. */
4436 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4437 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4438 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4439 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4441 /* If AVX512F is disabled, squash the registers. */
4442 if (! TARGET_AVX512F)
4444 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4445 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4447 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4448 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4451 /* If MPX is disabled, squash the registers. */
4453 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4454 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4458 /* Save the current options */
4461 ix86_function_specific_save (struct cl_target_option *ptr,
4462 struct gcc_options *opts)
4464 ptr->arch = ix86_arch;
4465 ptr->schedule = ix86_schedule;
4466 ptr->tune = ix86_tune;
4467 ptr->branch_cost = ix86_branch_cost;
4468 ptr->tune_defaulted = ix86_tune_defaulted;
4469 ptr->arch_specified = ix86_arch_specified;
4470 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4471 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4472 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4473 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4474 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4475 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4476 ptr->x_ix86_abi = opts->x_ix86_abi;
4477 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4478 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4479 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4480 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4481 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4482 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4483 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4484 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4485 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4486 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4487 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4488 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4489 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4490 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4491 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4492 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4493 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4494 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4495 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4496 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4498 /* The fields are char but the variables are not; make sure the
4499 values fit in the fields. */
4500 gcc_assert (ptr->arch == ix86_arch);
4501 gcc_assert (ptr->schedule == ix86_schedule);
4502 gcc_assert (ptr->tune == ix86_tune);
4503 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4506 /* Restore the current options */
4509 ix86_function_specific_restore (struct gcc_options *opts,
4510 struct cl_target_option *ptr)
4512 enum processor_type old_tune = ix86_tune;
4513 enum processor_type old_arch = ix86_arch;
4514 unsigned int ix86_arch_mask;
4517 /* We don't change -fPIC. */
4518 opts->x_flag_pic = flag_pic;
4520 ix86_arch = (enum processor_type) ptr->arch;
4521 ix86_schedule = (enum attr_cpu) ptr->schedule;
4522 ix86_tune = (enum processor_type) ptr->tune;
4523 opts->x_ix86_branch_cost = ptr->branch_cost;
4524 ix86_tune_defaulted = ptr->tune_defaulted;
4525 ix86_arch_specified = ptr->arch_specified;
4526 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4527 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4528 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4529 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4530 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4531 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4532 opts->x_ix86_abi = ptr->x_ix86_abi;
4533 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4534 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4535 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4536 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4537 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4538 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4539 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4540 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4541 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4542 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4543 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4544 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4545 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4546 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4547 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4548 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4549 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4550 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4551 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4552 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4554 /* Recreate the arch feature tests if the arch changed */
4555 if (old_arch != ix86_arch)
4557 ix86_arch_mask = 1u << ix86_arch;
4558 for (i = 0; i < X86_ARCH_LAST; ++i)
4559 ix86_arch_features[i]
4560 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4563 /* Recreate the tune optimization tests */
4564 if (old_tune != ix86_tune)
4565 set_ix86_tune_features (ix86_tune, false);
4568 /* Print the current options */
4571 ix86_function_specific_print (FILE *file, int indent,
4572 struct cl_target_option *ptr)
4575 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4576 NULL, NULL, ptr->x_ix86_fpmath, false);
4578 gcc_assert (ptr->arch < PROCESSOR_max);
4579 fprintf (file, "%*sarch = %d (%s)\n",
4581 ptr->arch, processor_target_table[ptr->arch].name);
4583 gcc_assert (ptr->tune < PROCESSOR_max);
4584 fprintf (file, "%*stune = %d (%s)\n",
4586 ptr->tune, processor_target_table[ptr->tune].name);
4588 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4592 fprintf (file, "%*s%s\n", indent, "", target_string);
4593 free (target_string);
4598 /* Inner function to process the attribute((target(...))), take an argument and
4599 set the current options from the argument. If we have a list, recursively go
4603 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4604 struct gcc_options *opts,
4605 struct gcc_options *opts_set,
4606 struct gcc_options *enum_opts_set)
4611 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4612 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4613 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4614 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4615 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4631 enum ix86_opt_type type;
4636 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4637 IX86_ATTR_ISA ("abm", OPT_mabm),
4638 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4639 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4640 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4641 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4642 IX86_ATTR_ISA ("aes", OPT_maes),
4643 IX86_ATTR_ISA ("sha", OPT_msha),
4644 IX86_ATTR_ISA ("avx", OPT_mavx),
4645 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4646 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4647 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4648 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4649 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4650 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4651 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4652 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4653 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4654 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4655 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4656 IX86_ATTR_ISA ("sse", OPT_msse),
4657 IX86_ATTR_ISA ("sse2", OPT_msse2),
4658 IX86_ATTR_ISA ("sse3", OPT_msse3),
4659 IX86_ATTR_ISA ("sse4", OPT_msse4),
4660 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4661 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4662 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4663 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4664 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4665 IX86_ATTR_ISA ("fma", OPT_mfma),
4666 IX86_ATTR_ISA ("xop", OPT_mxop),
4667 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4668 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4669 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4670 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4671 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4672 IX86_ATTR_ISA ("hle", OPT_mhle),
4673 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4674 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4675 IX86_ATTR_ISA ("adx", OPT_madx),
4676 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4677 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4678 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4679 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4680 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4681 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4682 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4683 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
4684 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
4685 IX86_ATTR_ISA ("clwb", OPT_mclwb),
4686 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
4689 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4691 /* string options */
4692 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4693 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4696 IX86_ATTR_YES ("cld",
4700 IX86_ATTR_NO ("fancy-math-387",
4701 OPT_mfancy_math_387,
4702 MASK_NO_FANCY_MATH_387),
4704 IX86_ATTR_YES ("ieee-fp",
4708 IX86_ATTR_YES ("inline-all-stringops",
4709 OPT_minline_all_stringops,
4710 MASK_INLINE_ALL_STRINGOPS),
4712 IX86_ATTR_YES ("inline-stringops-dynamically",
4713 OPT_minline_stringops_dynamically,
4714 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4716 IX86_ATTR_NO ("align-stringops",
4717 OPT_mno_align_stringops,
4718 MASK_NO_ALIGN_STRINGOPS),
4720 IX86_ATTR_YES ("recip",
4726 /* If this is a list, recurse to get the options. */
4727 if (TREE_CODE (args) == TREE_LIST)
4731 for (; args; args = TREE_CHAIN (args))
4732 if (TREE_VALUE (args)
4733 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4734 p_strings, opts, opts_set,
4741 else if (TREE_CODE (args) != STRING_CST)
4743 error ("attribute %<target%> argument not a string");
4747 /* Handle multiple arguments separated by commas. */
4748 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4750 while (next_optstr && *next_optstr != '\0')
4752 char *p = next_optstr;
4754 char *comma = strchr (next_optstr, ',');
4755 const char *opt_string;
4756 size_t len, opt_len;
4761 enum ix86_opt_type type = ix86_opt_unknown;
4767 len = comma - next_optstr;
4768 next_optstr = comma + 1;
4776 /* Recognize no-xxx. */
4777 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4786 /* Find the option. */
4789 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4791 type = attrs[i].type;
4792 opt_len = attrs[i].len;
4793 if (ch == attrs[i].string[0]
4794 && ((type != ix86_opt_str && type != ix86_opt_enum)
4797 && memcmp (p, attrs[i].string, opt_len) == 0)
4800 mask = attrs[i].mask;
4801 opt_string = attrs[i].string;
4806 /* Process the option. */
4809 error ("attribute(target(\"%s\")) is unknown", orig_p);
4813 else if (type == ix86_opt_isa)
4815 struct cl_decoded_option decoded;
4817 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4818 ix86_handle_option (opts, opts_set,
4819 &decoded, input_location);
4822 else if (type == ix86_opt_yes || type == ix86_opt_no)
4824 if (type == ix86_opt_no)
4825 opt_set_p = !opt_set_p;
4828 opts->x_target_flags |= mask;
4830 opts->x_target_flags &= ~mask;
4833 else if (type == ix86_opt_str)
4837 error ("option(\"%s\") was already specified", opt_string);
4841 p_strings[opt] = xstrdup (p + opt_len);
4844 else if (type == ix86_opt_enum)
4849 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4851 set_option (opts, enum_opts_set, opt, value,
4852 p + opt_len, DK_UNSPECIFIED, input_location,
4856 error ("attribute(target(\"%s\")) is unknown", orig_p);
4868 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4871 ix86_valid_target_attribute_tree (tree args,
4872 struct gcc_options *opts,
4873 struct gcc_options *opts_set)
4875 const char *orig_arch_string = opts->x_ix86_arch_string;
4876 const char *orig_tune_string = opts->x_ix86_tune_string;
4877 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4878 int orig_tune_defaulted = ix86_tune_defaulted;
4879 int orig_arch_specified = ix86_arch_specified;
4880 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4883 struct cl_target_option *def
4884 = TREE_TARGET_OPTION (target_option_default_node);
4885 struct gcc_options enum_opts_set;
4887 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4889 /* Process each of the options on the chain. */
4890 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4891 opts_set, &enum_opts_set))
4892 return error_mark_node;
4894 /* If the changed options are different from the default, rerun
4895 ix86_option_override_internal, and then save the options away.
4896 The string options are are attribute options, and will be undone
4897 when we copy the save structure. */
4898 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4899 || opts->x_target_flags != def->x_target_flags
4900 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4901 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4902 || enum_opts_set.x_ix86_fpmath)
4904 /* If we are using the default tune= or arch=, undo the string assigned,
4905 and use the default. */
4906 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4907 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4908 else if (!orig_arch_specified)
4909 opts->x_ix86_arch_string = NULL;
4911 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4912 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4913 else if (orig_tune_defaulted)
4914 opts->x_ix86_tune_string = NULL;
4916 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4917 if (enum_opts_set.x_ix86_fpmath)
4918 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4919 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4920 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4922 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4923 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4926 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4927 ix86_option_override_internal (false, opts, opts_set);
4929 /* Add any builtin functions with the new isa if any. */
4930 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4932 /* Save the current options unless we are validating options for
4934 t = build_target_option_node (opts);
4936 opts->x_ix86_arch_string = orig_arch_string;
4937 opts->x_ix86_tune_string = orig_tune_string;
4938 opts_set->x_ix86_fpmath = orig_fpmath_set;
4940 /* Free up memory allocated to hold the strings */
4941 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4942 free (option_strings[i]);
4948 /* Hook to validate attribute((target("string"))). */
4951 ix86_valid_target_attribute_p (tree fndecl,
4952 tree ARG_UNUSED (name),
4954 int ARG_UNUSED (flags))
4956 struct gcc_options func_options;
4957 tree new_target, new_optimize;
4960 /* attribute((target("default"))) does nothing, beyond
4961 affecting multi-versioning. */
4962 if (TREE_VALUE (args)
4963 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4964 && TREE_CHAIN (args) == NULL_TREE
4965 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4968 tree old_optimize = build_optimization_node (&global_options);
4970 /* Get the optimization options of the current function. */
4971 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4974 func_optimize = old_optimize;
4976 /* Init func_options. */
4977 memset (&func_options, 0, sizeof (func_options));
4978 init_options_struct (&func_options, NULL);
4979 lang_hooks.init_options_struct (&func_options);
4981 cl_optimization_restore (&func_options,
4982 TREE_OPTIMIZATION (func_optimize));
4984 /* Initialize func_options to the default before its target options can
4986 cl_target_option_restore (&func_options,
4987 TREE_TARGET_OPTION (target_option_default_node));
4989 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4990 &global_options_set);
4992 new_optimize = build_optimization_node (&func_options);
4994 if (new_target == error_mark_node)
4997 else if (fndecl && new_target)
4999 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5001 if (old_optimize != new_optimize)
5002 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5009 /* Hook to determine if one function can safely inline another. */
5012 ix86_can_inline_p (tree caller, tree callee)
5015 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5016 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5018 /* If callee has no option attributes, then it is ok to inline. */
5022 /* If caller has no option attributes, but callee does then it is not ok to
5024 else if (!caller_tree)
5029 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5030 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5032 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5033 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5035 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5036 != callee_opts->x_ix86_isa_flags)
5039 /* See if we have the same non-isa options. */
5040 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5043 /* See if arch, tune, etc. are the same. */
5044 else if (caller_opts->arch != callee_opts->arch)
5047 else if (caller_opts->tune != callee_opts->tune)
5050 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5053 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5064 /* Remember the last target of ix86_set_current_function. */
5065 static GTY(()) tree ix86_previous_fndecl;
5067 /* Set target globals to default. */
5070 ix86_reset_to_default_globals (void)
5072 tree old_tree = (ix86_previous_fndecl
5073 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
5078 tree new_tree = target_option_current_node;
5079 cl_target_option_restore (&global_options,
5080 TREE_TARGET_OPTION (new_tree));
5081 if (TREE_TARGET_GLOBALS (new_tree))
5082 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5083 else if (new_tree == target_option_default_node)
5084 restore_target_globals (&default_target_globals);
5086 TREE_TARGET_GLOBALS (new_tree)
5087 = save_target_globals_default_opts ();
5091 /* Invalidate ix86_previous_fndecl cache. */
5093 ix86_reset_previous_fndecl (void)
5095 ix86_reset_to_default_globals ();
5096 ix86_previous_fndecl = NULL_TREE;
5099 /* Establish appropriate back-end context for processing the function
5100 FNDECL. The argument might be NULL to indicate processing at top
5101 level, outside of any function scope. */
5103 ix86_set_current_function (tree fndecl)
5105 /* Only change the context if the function changes. This hook is called
5106 several times in the course of compiling a function, and we don't want to
5107 slow things down too much or call target_reinit when it isn't safe. */
5108 if (fndecl && fndecl != ix86_previous_fndecl)
5110 tree old_tree = (ix86_previous_fndecl
5111 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
5114 tree new_tree = (fndecl
5115 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
5118 if (old_tree == new_tree)
5121 else if (new_tree && new_tree != target_option_default_node)
5123 cl_target_option_restore (&global_options,
5124 TREE_TARGET_OPTION (new_tree));
5125 if (TREE_TARGET_GLOBALS (new_tree))
5126 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5128 TREE_TARGET_GLOBALS (new_tree)
5129 = save_target_globals_default_opts ();
5132 else if (old_tree && old_tree != target_option_default_node)
5133 ix86_reset_to_default_globals ();
5134 ix86_previous_fndecl = fndecl;
5139 /* Return true if this goes in large data/bss. */
5142 ix86_in_large_data_p (tree exp)
5144 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5147 /* Functions are never large data. */
5148 if (TREE_CODE (exp) == FUNCTION_DECL)
5151 /* Automatic variables are never large data. */
5152 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5155 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5157 const char *section = DECL_SECTION_NAME (exp);
5158 if (strcmp (section, ".ldata") == 0
5159 || strcmp (section, ".lbss") == 0)
5165 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5167 /* If this is an incomplete type with size 0, then we can't put it
5168 in data because it might be too big when completed. Also,
5169 int_size_in_bytes returns -1 if size can vary or is larger than
5170 an integer in which case also it is safer to assume that it goes in
5172 if (size <= 0 || size > ix86_section_threshold)
5179 /* Switch to the appropriate section for output of DECL.
5180 DECL is either a `VAR_DECL' node or a constant of some sort.
5181 RELOC indicates whether forming the initial value of DECL requires
5182 link-time relocations. */
5184 ATTRIBUTE_UNUSED static section *
5185 x86_64_elf_select_section (tree decl, int reloc,
5186 unsigned HOST_WIDE_INT align)
5188 if (ix86_in_large_data_p (decl))
5190 const char *sname = NULL;
5191 unsigned int flags = SECTION_WRITE;
5192 switch (categorize_decl_for_section (decl, reloc))
5197 case SECCAT_DATA_REL:
5198 sname = ".ldata.rel";
5200 case SECCAT_DATA_REL_LOCAL:
5201 sname = ".ldata.rel.local";
5203 case SECCAT_DATA_REL_RO:
5204 sname = ".ldata.rel.ro";
5206 case SECCAT_DATA_REL_RO_LOCAL:
5207 sname = ".ldata.rel.ro.local";
5211 flags |= SECTION_BSS;
5214 case SECCAT_RODATA_MERGE_STR:
5215 case SECCAT_RODATA_MERGE_STR_INIT:
5216 case SECCAT_RODATA_MERGE_CONST:
5220 case SECCAT_SRODATA:
5227 /* We don't split these for medium model. Place them into
5228 default sections and hope for best. */
5233 /* We might get called with string constants, but get_named_section
5234 doesn't like them as they are not DECLs. Also, we need to set
5235 flags in that case. */
5237 return get_section (sname, flags, NULL);
5238 return get_named_section (decl, sname, reloc);
5241 return default_elf_select_section (decl, reloc, align);
5244 /* Select a set of attributes for section NAME based on the properties
5245 of DECL and whether or not RELOC indicates that DECL's initializer
5246 might contain runtime relocations. */
5248 static unsigned int ATTRIBUTE_UNUSED
5249 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5251 unsigned int flags = default_section_type_flags (decl, name, reloc);
5253 if (decl == NULL_TREE
5254 && (strcmp (name, ".ldata.rel.ro") == 0
5255 || strcmp (name, ".ldata.rel.ro.local") == 0))
5256 flags |= SECTION_RELRO;
5258 if (strcmp (name, ".lbss") == 0
5259 || strncmp (name, ".lbss.", 5) == 0
5260 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5261 flags |= SECTION_BSS;
5266 /* Build up a unique section name, expressed as a
5267 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5268 RELOC indicates whether the initial value of EXP requires
5269 link-time relocations. */
5271 static void ATTRIBUTE_UNUSED
5272 x86_64_elf_unique_section (tree decl, int reloc)
5274 if (ix86_in_large_data_p (decl))
5276 const char *prefix = NULL;
5277 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5278 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5280 switch (categorize_decl_for_section (decl, reloc))
5283 case SECCAT_DATA_REL:
5284 case SECCAT_DATA_REL_LOCAL:
5285 case SECCAT_DATA_REL_RO:
5286 case SECCAT_DATA_REL_RO_LOCAL:
5287 prefix = one_only ? ".ld" : ".ldata";
5290 prefix = one_only ? ".lb" : ".lbss";
5293 case SECCAT_RODATA_MERGE_STR:
5294 case SECCAT_RODATA_MERGE_STR_INIT:
5295 case SECCAT_RODATA_MERGE_CONST:
5296 prefix = one_only ? ".lr" : ".lrodata";
5298 case SECCAT_SRODATA:
5305 /* We don't split these for medium model. Place them into
5306 default sections and hope for best. */
5311 const char *name, *linkonce;
5314 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5315 name = targetm.strip_name_encoding (name);
5317 /* If we're using one_only, then there needs to be a .gnu.linkonce
5318 prefix to the section name. */
5319 linkonce = one_only ? ".gnu.linkonce" : "";
5321 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5323 set_decl_section_name (decl, string);
5327 default_unique_section (decl, reloc);
5330 #ifdef COMMON_ASM_OP
5331 /* This says how to output assembler code to declare an
5332 uninitialized external linkage data object.
5334 For medium model x86-64 we need to use .largecomm opcode for
5337 x86_elf_aligned_common (FILE *file,
5338 const char *name, unsigned HOST_WIDE_INT size,
5341 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5342 && size > (unsigned int)ix86_section_threshold)
5343 fputs ("\t.largecomm\t", file);
5345 fputs (COMMON_ASM_OP, file);
5346 assemble_name (file, name);
5347 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5348 size, align / BITS_PER_UNIT);
5352 /* Utility function for targets to use in implementing
5353 ASM_OUTPUT_ALIGNED_BSS. */
5356 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5357 unsigned HOST_WIDE_INT size, int align)
5359 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5360 && size > (unsigned int)ix86_section_threshold)
5361 switch_to_section (get_named_section (decl, ".lbss", 0));
5363 switch_to_section (bss_section);
5364 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5365 #ifdef ASM_DECLARE_OBJECT_NAME
5366 last_assemble_variable_decl = decl;
5367 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5369 /* Standard thing is just output label for the object. */
5370 ASM_OUTPUT_LABEL (file, name);
5371 #endif /* ASM_DECLARE_OBJECT_NAME */
5372 ASM_OUTPUT_SKIP (file, size ? size : 1);
5375 /* Decide whether we must probe the stack before any space allocation
5376 on this target. It's essentially TARGET_STACK_PROBE except when
5377 -fstack-check causes the stack to be already probed differently. */
5380 ix86_target_stack_probe (void)
5382 /* Do not probe the stack twice if static stack checking is enabled. */
5383 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5386 return TARGET_STACK_PROBE;
5389 /* Decide whether we can make a sibling call to a function. DECL is the
5390 declaration of the function being targeted by the call and EXP is the
5391 CALL_EXPR representing the call. */
5394 ix86_function_ok_for_sibcall (tree decl, tree exp)
5396 tree type, decl_or_type;
5399 /* If we are generating position-independent code, we cannot sibcall
5400 optimize any indirect call, or a direct call to a global function,
5401 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5405 && (!decl || !targetm.binds_local_p (decl)))
5408 /* If we need to align the outgoing stack, then sibcalling would
5409 unalign the stack, which may break the called function. */
5410 if (ix86_minimum_incoming_stack_boundary (true)
5411 < PREFERRED_STACK_BOUNDARY)
5416 decl_or_type = decl;
5417 type = TREE_TYPE (decl);
5421 /* We're looking at the CALL_EXPR, we need the type of the function. */
5422 type = CALL_EXPR_FN (exp); /* pointer expression */
5423 type = TREE_TYPE (type); /* pointer type */
5424 type = TREE_TYPE (type); /* function type */
5425 decl_or_type = type;
5428 /* Check that the return value locations are the same. Like
5429 if we are returning floats on the 80387 register stack, we cannot
5430 make a sibcall from a function that doesn't return a float to a
5431 function that does or, conversely, from a function that does return
5432 a float to a function that doesn't; the necessary stack adjustment
5433 would not be executed. This is also the place we notice
5434 differences in the return value ABI. Note that it is ok for one
5435 of the functions to have void return type as long as the return
5436 value of the other is passed in a register. */
5437 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5438 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5440 if (STACK_REG_P (a) || STACK_REG_P (b))
5442 if (!rtx_equal_p (a, b))
5445 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5447 else if (!rtx_equal_p (a, b))
5452 /* The SYSV ABI has more call-clobbered registers;
5453 disallow sibcalls from MS to SYSV. */
5454 if (cfun->machine->call_abi == MS_ABI
5455 && ix86_function_type_abi (type) == SYSV_ABI)
5460 /* If this call is indirect, we'll need to be able to use a
5461 call-clobbered register for the address of the target function.
5462 Make sure that all such registers are not used for passing
5463 parameters. Note that DLLIMPORT functions are indirect. */
5465 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5467 if (ix86_function_regparm (type, NULL) >= 3)
5469 /* ??? Need to count the actual number of registers to be used,
5470 not the possible number of registers. Fix later. */
5476 /* Otherwise okay. That also includes certain types of indirect calls. */
5480 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5481 and "sseregparm" calling convention attributes;
5482 arguments as in struct attribute_spec.handler. */
5485 ix86_handle_cconv_attribute (tree *node, tree name,
5490 if (TREE_CODE (*node) != FUNCTION_TYPE
5491 && TREE_CODE (*node) != METHOD_TYPE
5492 && TREE_CODE (*node) != FIELD_DECL
5493 && TREE_CODE (*node) != TYPE_DECL)
5495 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5497 *no_add_attrs = true;
5501 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5502 if (is_attribute_p ("regparm", name))
5506 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5508 error ("fastcall and regparm attributes are not compatible");
5511 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5513 error ("regparam and thiscall attributes are not compatible");
5516 cst = TREE_VALUE (args);
5517 if (TREE_CODE (cst) != INTEGER_CST)
5519 warning (OPT_Wattributes,
5520 "%qE attribute requires an integer constant argument",
5522 *no_add_attrs = true;
5524 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5526 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5528 *no_add_attrs = true;
5536 /* Do not warn when emulating the MS ABI. */
5537 if ((TREE_CODE (*node) != FUNCTION_TYPE
5538 && TREE_CODE (*node) != METHOD_TYPE)
5539 || ix86_function_type_abi (*node) != MS_ABI)
5540 warning (OPT_Wattributes, "%qE attribute ignored",
5542 *no_add_attrs = true;
5546 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5547 if (is_attribute_p ("fastcall", name))
5549 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5551 error ("fastcall and cdecl attributes are not compatible");
5553 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5555 error ("fastcall and stdcall attributes are not compatible");
5557 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5559 error ("fastcall and regparm attributes are not compatible");
5561 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5563 error ("fastcall and thiscall attributes are not compatible");
5567 /* Can combine stdcall with fastcall (redundant), regparm and
5569 else if (is_attribute_p ("stdcall", name))
5571 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5573 error ("stdcall and cdecl attributes are not compatible");
5575 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5577 error ("stdcall and fastcall attributes are not compatible");
5579 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5581 error ("stdcall and thiscall attributes are not compatible");
5585 /* Can combine cdecl with regparm and sseregparm. */
5586 else if (is_attribute_p ("cdecl", name))
5588 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5590 error ("stdcall and cdecl attributes are not compatible");
5592 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5594 error ("fastcall and cdecl attributes are not compatible");
5596 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5598 error ("cdecl and thiscall attributes are not compatible");
5601 else if (is_attribute_p ("thiscall", name))
5603 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5604 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5606 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5608 error ("stdcall and thiscall attributes are not compatible");
5610 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5612 error ("fastcall and thiscall attributes are not compatible");
5614 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5616 error ("cdecl and thiscall attributes are not compatible");
5620 /* Can combine sseregparm with all attributes. */
5625 /* The transactional memory builtins are implicitly regparm or fastcall
5626 depending on the ABI. Override the generic do-nothing attribute that
5627 these builtins were declared with, and replace it with one of the two
5628 attributes that we expect elsewhere. */
5631 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5632 int flags, bool *no_add_attrs)
5636 /* In no case do we want to add the placeholder attribute. */
5637 *no_add_attrs = true;
5639 /* The 64-bit ABI is unchanged for transactional memory. */
5643 /* ??? Is there a better way to validate 32-bit windows? We have
5644 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5645 if (CHECK_STACK_LIMIT > 0)
5646 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5649 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5650 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5652 decl_attributes (node, alt, flags);
5657 /* This function determines from TYPE the calling-convention. */
5660 ix86_get_callcvt (const_tree type)
5662 unsigned int ret = 0;
5667 return IX86_CALLCVT_CDECL;
5669 attrs = TYPE_ATTRIBUTES (type);
5670 if (attrs != NULL_TREE)
5672 if (lookup_attribute ("cdecl", attrs))
5673 ret |= IX86_CALLCVT_CDECL;
5674 else if (lookup_attribute ("stdcall", attrs))
5675 ret |= IX86_CALLCVT_STDCALL;
5676 else if (lookup_attribute ("fastcall", attrs))
5677 ret |= IX86_CALLCVT_FASTCALL;
5678 else if (lookup_attribute ("thiscall", attrs))
5679 ret |= IX86_CALLCVT_THISCALL;
5681 /* Regparam isn't allowed for thiscall and fastcall. */
5682 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5684 if (lookup_attribute ("regparm", attrs))
5685 ret |= IX86_CALLCVT_REGPARM;
5686 if (lookup_attribute ("sseregparm", attrs))
5687 ret |= IX86_CALLCVT_SSEREGPARM;
5690 if (IX86_BASE_CALLCVT(ret) != 0)
5694 is_stdarg = stdarg_p (type);
5695 if (TARGET_RTD && !is_stdarg)
5696 return IX86_CALLCVT_STDCALL | ret;
5700 || TREE_CODE (type) != METHOD_TYPE
5701 || ix86_function_type_abi (type) != MS_ABI)
5702 return IX86_CALLCVT_CDECL | ret;
5704 return IX86_CALLCVT_THISCALL;
5707 /* Return 0 if the attributes for two types are incompatible, 1 if they
5708 are compatible, and 2 if they are nearly compatible (which causes a
5709 warning to be generated). */
5712 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5714 unsigned int ccvt1, ccvt2;
5716 if (TREE_CODE (type1) != FUNCTION_TYPE
5717 && TREE_CODE (type1) != METHOD_TYPE)
5720 ccvt1 = ix86_get_callcvt (type1);
5721 ccvt2 = ix86_get_callcvt (type2);
5724 if (ix86_function_regparm (type1, NULL)
5725 != ix86_function_regparm (type2, NULL))
5731 /* Return the regparm value for a function with the indicated TYPE and DECL.
5732 DECL may be NULL when calling function indirectly
5733 or considering a libcall. */
5736 ix86_function_regparm (const_tree type, const_tree decl)
5743 return (ix86_function_type_abi (type) == SYSV_ABI
5744 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5745 ccvt = ix86_get_callcvt (type);
5746 regparm = ix86_regparm;
5748 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5750 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5753 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5757 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5759 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5762 /* Use register calling convention for local functions when possible. */
5764 && TREE_CODE (decl) == FUNCTION_DECL
5765 /* Caller and callee must agree on the calling convention, so
5766 checking here just optimize means that with
5767 __attribute__((optimize (...))) caller could use regparm convention
5768 and callee not, or vice versa. Instead look at whether the callee
5769 is optimized or not. */
5770 && opt_for_fn (decl, optimize)
5771 && !(profile_flag && !flag_fentry))
5773 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5774 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE (decl));
5775 if (i && i->local && i->can_change_signature)
5777 int local_regparm, globals = 0, regno;
5779 /* Make sure no regparm register is taken by a
5780 fixed register variable. */
5781 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5782 if (fixed_regs[local_regparm])
5785 /* We don't want to use regparm(3) for nested functions as
5786 these use a static chain pointer in the third argument. */
5787 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5790 /* In 32-bit mode save a register for the split stack. */
5791 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5794 /* Each fixed register usage increases register pressure,
5795 so less registers should be used for argument passing.
5796 This functionality can be overriden by an explicit
5798 for (regno = AX_REG; regno <= DI_REG; regno++)
5799 if (fixed_regs[regno])
5803 = globals < local_regparm ? local_regparm - globals : 0;
5805 if (local_regparm > regparm)
5806 regparm = local_regparm;
5813 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5814 DFmode (2) arguments in SSE registers for a function with the
5815 indicated TYPE and DECL. DECL may be NULL when calling function
5816 indirectly or considering a libcall. Otherwise return 0. */
5819 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5821 gcc_assert (!TARGET_64BIT);
5823 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5824 by the sseregparm attribute. */
5825 if (TARGET_SSEREGPARM
5826 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5833 error ("calling %qD with attribute sseregparm without "
5834 "SSE/SSE2 enabled", decl);
5836 error ("calling %qT with attribute sseregparm without "
5837 "SSE/SSE2 enabled", type);
5845 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5846 (and DFmode for SSE2) arguments in SSE registers. */
5847 if (decl && TARGET_SSE_MATH && optimize
5848 && !(profile_flag && !flag_fentry))
5850 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5851 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5852 if (i && i->local && i->can_change_signature)
5853 return TARGET_SSE2 ? 2 : 1;
5859 /* Return true if EAX is live at the start of the function. Used by
5860 ix86_expand_prologue to determine if we need special help before
5861 calling allocate_stack_worker. */
5864 ix86_eax_live_at_start_p (void)
5866 /* Cheat. Don't bother working forward from ix86_function_regparm
5867 to the function type to whether an actual argument is located in
5868 eax. Instead just look at cfg info, which is still close enough
5869 to correct at this point. This gives false positives for broken
5870 functions that might use uninitialized data that happens to be
5871 allocated in eax, but who cares? */
5872 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5876 ix86_keep_aggregate_return_pointer (tree fntype)
5882 attr = lookup_attribute ("callee_pop_aggregate_return",
5883 TYPE_ATTRIBUTES (fntype));
5885 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5887 /* For 32-bit MS-ABI the default is to keep aggregate
5889 if (ix86_function_type_abi (fntype) == MS_ABI)
5892 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5895 /* Value is the number of bytes of arguments automatically
5896 popped when returning from a subroutine call.
5897 FUNDECL is the declaration node of the function (as a tree),
5898 FUNTYPE is the data type of the function (as a tree),
5899 or for a library call it is an identifier node for the subroutine name.
5900 SIZE is the number of bytes of arguments passed on the stack.
5902 On the 80386, the RTD insn may be used to pop them if the number
5903 of args is fixed, but if the number is variable then the caller
5904 must pop them all. RTD can't be used for library calls now
5905 because the library is compiled with the Unix compiler.
5906 Use of RTD is a selectable option, since it is incompatible with
5907 standard Unix calling sequences. If the option is not selected,
5908 the caller must always pop the args.
5910 The attribute stdcall is equivalent to RTD on a per module basis. */
5913 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5917 /* None of the 64-bit ABIs pop arguments. */
5921 ccvt = ix86_get_callcvt (funtype);
5923 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5924 | IX86_CALLCVT_THISCALL)) != 0
5925 && ! stdarg_p (funtype))
5928 /* Lose any fake structure return argument if it is passed on the stack. */
5929 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5930 && !ix86_keep_aggregate_return_pointer (funtype))
5932 int nregs = ix86_function_regparm (funtype, fundecl);
5934 return GET_MODE_SIZE (Pmode);
5940 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5943 ix86_legitimate_combined_insn (rtx_insn *insn)
5945 /* Check operand constraints in case hard registers were propagated
5946 into insn pattern. This check prevents combine pass from
5947 generating insn patterns with invalid hard register operands.
5948 These invalid insns can eventually confuse reload to error out
5949 with a spill failure. See also PRs 46829 and 46843. */
5950 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5954 extract_insn (insn);
5955 preprocess_constraints (insn);
5957 int n_operands = recog_data.n_operands;
5958 int n_alternatives = recog_data.n_alternatives;
5959 for (i = 0; i < n_operands; i++)
5961 rtx op = recog_data.operand[i];
5962 machine_mode mode = GET_MODE (op);
5963 const operand_alternative *op_alt;
5968 /* For pre-AVX disallow unaligned loads/stores where the
5969 instructions don't support it. */
5971 && VECTOR_MODE_P (GET_MODE (op))
5972 && misaligned_operand (op, GET_MODE (op)))
5974 int min_align = get_attr_ssememalign (insn);
5979 /* A unary operator may be accepted by the predicate, but it
5980 is irrelevant for matching constraints. */
5984 if (GET_CODE (op) == SUBREG)
5986 if (REG_P (SUBREG_REG (op))
5987 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5988 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5989 GET_MODE (SUBREG_REG (op)),
5992 op = SUBREG_REG (op);
5995 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5998 op_alt = recog_op_alt;
6000 /* Operand has no constraints, anything is OK. */
6001 win = !n_alternatives;
6003 alternative_mask preferred = get_preferred_alternatives (insn);
6004 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6006 if (!TEST_BIT (preferred, j))
6008 if (op_alt[i].anything_ok
6009 || (op_alt[i].matches != -1
6011 (recog_data.operand[i],
6012 recog_data.operand[op_alt[i].matches]))
6013 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6028 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6030 static unsigned HOST_WIDE_INT
6031 ix86_asan_shadow_offset (void)
6033 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6034 : HOST_WIDE_INT_C (0x7fff8000))
6035 : (HOST_WIDE_INT_1 << 29);
6038 /* Argument support functions. */
6040 /* Return true when register may be used to pass function parameters. */
6042 ix86_function_arg_regno_p (int regno)
6045 const int *parm_regs;
6050 return (regno < REGPARM_MAX
6051 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6053 return (regno < REGPARM_MAX
6054 || (TARGET_MMX && MMX_REGNO_P (regno)
6055 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6056 || (TARGET_SSE && SSE_REGNO_P (regno)
6057 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6060 if (TARGET_SSE && SSE_REGNO_P (regno)
6061 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6064 /* TODO: The function should depend on current function ABI but
6065 builtins.c would need updating then. Therefore we use the
6068 /* RAX is used as hidden argument to va_arg functions. */
6069 if (ix86_abi == SYSV_ABI && regno == AX_REG)
6072 if (ix86_abi == MS_ABI)
6073 parm_regs = x86_64_ms_abi_int_parameter_registers;
6075 parm_regs = x86_64_int_parameter_registers;
6076 for (i = 0; i < (ix86_abi == MS_ABI
6077 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6078 if (regno == parm_regs[i])
6083 /* Return if we do not know how to pass TYPE solely in registers. */
6086 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6088 if (must_pass_in_stack_var_size_or_pad (mode, type))
6091 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6092 The layout_type routine is crafty and tries to trick us into passing
6093 currently unsupported vector types on the stack by using TImode. */
6094 return (!TARGET_64BIT && mode == TImode
6095 && type && TREE_CODE (type) != VECTOR_TYPE);
6098 /* It returns the size, in bytes, of the area reserved for arguments passed
6099 in registers for the function represented by fndecl dependent to the used
6102 ix86_reg_parm_stack_space (const_tree fndecl)
6104 enum calling_abi call_abi = SYSV_ABI;
6105 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6106 call_abi = ix86_function_abi (fndecl);
6108 call_abi = ix86_function_type_abi (fndecl);
6109 if (TARGET_64BIT && call_abi == MS_ABI)
6114 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6117 ix86_function_type_abi (const_tree fntype)
6119 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6121 enum calling_abi abi = ix86_abi;
6122 if (abi == SYSV_ABI)
6124 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6128 static bool warned = false;
6131 error ("X32 does not support ms_abi attribute");
6138 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6145 /* We add this as a workaround in order to use libc_has_function
6148 ix86_libc_has_function (enum function_class fn_class)
6150 return targetm.libc_has_function (fn_class);
6154 ix86_function_ms_hook_prologue (const_tree fn)
6156 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6158 if (decl_function_context (fn) != NULL_TREE)
6159 error_at (DECL_SOURCE_LOCATION (fn),
6160 "ms_hook_prologue is not compatible with nested function");
6167 static enum calling_abi
6168 ix86_function_abi (const_tree fndecl)
6172 return ix86_function_type_abi (TREE_TYPE (fndecl));
6175 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6178 ix86_cfun_abi (void)
6182 return cfun->machine->call_abi;
6185 /* Write the extra assembler code needed to declare a function properly. */
6188 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6191 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6195 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6196 unsigned int filler_cc = 0xcccccccc;
6198 for (i = 0; i < filler_count; i += 4)
6199 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6202 #ifdef SUBTARGET_ASM_UNWIND_INIT
6203 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6206 ASM_OUTPUT_LABEL (asm_out_file, fname);
6208 /* Output magic byte marker, if hot-patch attribute is set. */
6213 /* leaq [%rsp + 0], %rsp */
6214 asm_fprintf (asm_out_file, ASM_BYTE
6215 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6219 /* movl.s %edi, %edi
6221 movl.s %esp, %ebp */
6222 asm_fprintf (asm_out_file, ASM_BYTE
6223 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6229 extern void init_regs (void);
6231 /* Implementation of call abi switching target hook. Specific to FNDECL
6232 the specific call register sets are set. See also
6233 ix86_conditional_register_usage for more details. */
6235 ix86_call_abi_override (const_tree fndecl)
6237 if (fndecl == NULL_TREE)
6238 cfun->machine->call_abi = ix86_abi;
6240 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6243 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6244 expensive re-initialization of init_regs each time we switch function context
6245 since this is needed only during RTL expansion. */
6247 ix86_maybe_switch_abi (void)
6250 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6254 /* Return 1 if pseudo register should be created and used to hold
6255 GOT address for PIC code. */
6257 ix86_use_pseudo_pic_reg (void)
6260 && (ix86_cmodel == CM_SMALL_PIC
6267 /* Initialize large model PIC register. */
6270 ix86_init_large_pic_reg (unsigned int tmp_regno)
6272 rtx_code_label *label;
6275 gcc_assert (Pmode == DImode);
6276 label = gen_label_rtx ();
6278 LABEL_PRESERVE_P (label) = 1;
6279 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6280 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6281 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6283 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6284 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6285 pic_offset_table_rtx, tmp_reg));
6288 /* Create and initialize PIC register if required. */
6290 ix86_init_pic_reg (void)
6295 if (!ix86_use_pseudo_pic_reg ())
6302 if (ix86_cmodel == CM_LARGE_PIC)
6303 ix86_init_large_pic_reg (R11_REG);
6305 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6309 /* If there is future mcount call in the function it is more profitable
6310 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6311 rtx reg = crtl->profile
6312 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6313 : pic_offset_table_rtx;
6314 rtx insn = emit_insn (gen_set_got (reg));
6315 RTX_FRAME_RELATED_P (insn) = 1;
6317 emit_move_insn (pic_offset_table_rtx, reg);
6318 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6324 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6325 insert_insn_on_edge (seq, entry_edge);
6326 commit_one_edge_insertion (entry_edge);
6329 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6330 for a call to a function whose data type is FNTYPE.
6331 For a library call, FNTYPE is 0. */
6334 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6335 tree fntype, /* tree ptr for function decl */
6336 rtx libname, /* SYMBOL_REF of library name or 0 */
6340 struct cgraph_local_info *i;
6342 memset (cum, 0, sizeof (*cum));
6346 i = cgraph_node::local_info (fndecl);
6347 cum->call_abi = ix86_function_abi (fndecl);
6352 cum->call_abi = ix86_function_type_abi (fntype);
6355 cum->caller = caller;
6357 /* Set up the number of registers to use for passing arguments. */
6358 cum->nregs = ix86_regparm;
6361 cum->nregs = (cum->call_abi == SYSV_ABI
6362 ? X86_64_REGPARM_MAX
6363 : X86_64_MS_REGPARM_MAX);
6367 cum->sse_nregs = SSE_REGPARM_MAX;
6370 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6371 ? X86_64_SSE_REGPARM_MAX
6372 : X86_64_MS_SSE_REGPARM_MAX);
6376 cum->mmx_nregs = MMX_REGPARM_MAX;
6377 cum->warn_avx512f = true;
6378 cum->warn_avx = true;
6379 cum->warn_sse = true;
6380 cum->warn_mmx = true;
6382 /* Because type might mismatch in between caller and callee, we need to
6383 use actual type of function for local calls.
6384 FIXME: cgraph_analyze can be told to actually record if function uses
6385 va_start so for local functions maybe_vaarg can be made aggressive
6387 FIXME: once typesytem is fixed, we won't need this code anymore. */
6388 if (i && i->local && i->can_change_signature)
6389 fntype = TREE_TYPE (fndecl);
6390 cum->stdarg = stdarg_p (fntype);
6391 cum->maybe_vaarg = (fntype
6392 ? (!prototype_p (fntype) || stdarg_p (fntype))
6395 cum->bnd_regno = FIRST_BND_REG;
6396 cum->bnds_in_bt = 0;
6397 cum->force_bnd_pass = 0;
6401 /* If there are variable arguments, then we won't pass anything
6402 in registers in 32-bit mode. */
6403 if (stdarg_p (fntype))
6408 cum->warn_avx512f = false;
6409 cum->warn_avx = false;
6410 cum->warn_sse = false;
6411 cum->warn_mmx = false;
6415 /* Use ecx and edx registers if function has fastcall attribute,
6416 else look for regparm information. */
6419 unsigned int ccvt = ix86_get_callcvt (fntype);
6420 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6423 cum->fastcall = 1; /* Same first register as in fastcall. */
6425 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6431 cum->nregs = ix86_function_regparm (fntype, fndecl);
6434 /* Set up the number of SSE registers used for passing SFmode
6435 and DFmode arguments. Warn for mismatching ABI. */
6436 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6440 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6441 But in the case of vector types, it is some vector mode.
6443 When we have only some of our vector isa extensions enabled, then there
6444 are some modes for which vector_mode_supported_p is false. For these
6445 modes, the generic vector support in gcc will choose some non-vector mode
6446 in order to implement the type. By computing the natural mode, we'll
6447 select the proper ABI location for the operand and not depend on whatever
6448 the middle-end decides to do with these vector types.
6450 The midde-end can't deal with the vector types > 16 bytes. In this
6451 case, we return the original mode and warn ABI change if CUM isn't
6454 If INT_RETURN is true, warn ABI change if the vector mode isn't
6455 available for function return value. */
6458 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6461 machine_mode mode = TYPE_MODE (type);
6463 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6465 HOST_WIDE_INT size = int_size_in_bytes (type);
6466 if ((size == 8 || size == 16 || size == 32 || size == 64)
6467 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6468 && TYPE_VECTOR_SUBPARTS (type) > 1)
6470 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6472 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6473 mode = MIN_MODE_VECTOR_FLOAT;
6475 mode = MIN_MODE_VECTOR_INT;
6477 /* Get the mode which has this inner mode and number of units. */
6478 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6479 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6480 && GET_MODE_INNER (mode) == innermode)
6482 if (size == 64 && !TARGET_AVX512F)
6484 static bool warnedavx512f;
6485 static bool warnedavx512f_ret;
6487 if (cum && cum->warn_avx512f && !warnedavx512f)
6489 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6490 "without AVX512F enabled changes the ABI"))
6491 warnedavx512f = true;
6493 else if (in_return && !warnedavx512f_ret)
6495 if (warning (OPT_Wpsabi, "AVX512F vector return "
6496 "without AVX512F enabled changes the ABI"))
6497 warnedavx512f_ret = true;
6500 return TYPE_MODE (type);
6502 else if (size == 32 && !TARGET_AVX)
6504 static bool warnedavx;
6505 static bool warnedavx_ret;
6507 if (cum && cum->warn_avx && !warnedavx)
6509 if (warning (OPT_Wpsabi, "AVX vector argument "
6510 "without AVX enabled changes the ABI"))
6513 else if (in_return && !warnedavx_ret)
6515 if (warning (OPT_Wpsabi, "AVX vector return "
6516 "without AVX enabled changes the ABI"))
6517 warnedavx_ret = true;
6520 return TYPE_MODE (type);
6522 else if (((size == 8 && TARGET_64BIT) || size == 16)
6525 static bool warnedsse;
6526 static bool warnedsse_ret;
6528 if (cum && cum->warn_sse && !warnedsse)
6530 if (warning (OPT_Wpsabi, "SSE vector argument "
6531 "without SSE enabled changes the ABI"))
6534 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6536 if (warning (OPT_Wpsabi, "SSE vector return "
6537 "without SSE enabled changes the ABI"))
6538 warnedsse_ret = true;
6541 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6543 static bool warnedmmx;
6544 static bool warnedmmx_ret;
6546 if (cum && cum->warn_mmx && !warnedmmx)
6548 if (warning (OPT_Wpsabi, "MMX vector argument "
6549 "without MMX enabled changes the ABI"))
6552 else if (in_return && !warnedmmx_ret)
6554 if (warning (OPT_Wpsabi, "MMX vector return "
6555 "without MMX enabled changes the ABI"))
6556 warnedmmx_ret = true;
6569 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6570 this may not agree with the mode that the type system has chosen for the
6571 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6572 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6575 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6580 if (orig_mode != BLKmode)
6581 tmp = gen_rtx_REG (orig_mode, regno);
6584 tmp = gen_rtx_REG (mode, regno);
6585 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6586 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6592 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6593 of this code is to classify each 8bytes of incoming argument by the register
6594 class and assign registers accordingly. */
6596 /* Return the union class of CLASS1 and CLASS2.
6597 See the x86-64 PS ABI for details. */
6599 static enum x86_64_reg_class
6600 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6602 /* Rule #1: If both classes are equal, this is the resulting class. */
6603 if (class1 == class2)
6606 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6608 if (class1 == X86_64_NO_CLASS)
6610 if (class2 == X86_64_NO_CLASS)
6613 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6614 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6615 return X86_64_MEMORY_CLASS;
6617 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6618 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6619 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6620 return X86_64_INTEGERSI_CLASS;
6621 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6622 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6623 return X86_64_INTEGER_CLASS;
6625 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6627 if (class1 == X86_64_X87_CLASS
6628 || class1 == X86_64_X87UP_CLASS
6629 || class1 == X86_64_COMPLEX_X87_CLASS
6630 || class2 == X86_64_X87_CLASS
6631 || class2 == X86_64_X87UP_CLASS
6632 || class2 == X86_64_COMPLEX_X87_CLASS)
6633 return X86_64_MEMORY_CLASS;
6635 /* Rule #6: Otherwise class SSE is used. */
6636 return X86_64_SSE_CLASS;
6639 /* Classify the argument of type TYPE and mode MODE.
6640 CLASSES will be filled by the register class used to pass each word
6641 of the operand. The number of words is returned. In case the parameter
6642 should be passed in memory, 0 is returned. As a special case for zero
6643 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6645 BIT_OFFSET is used internally for handling records and specifies offset
6646 of the offset in bits modulo 512 to avoid overflow cases.
6648 See the x86-64 PS ABI for details.
6652 classify_argument (machine_mode mode, const_tree type,
6653 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6655 HOST_WIDE_INT bytes =
6656 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6658 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6660 /* Variable sized entities are always passed/returned in memory. */
6664 if (mode != VOIDmode
6665 && targetm.calls.must_pass_in_stack (mode, type))
6668 if (type && AGGREGATE_TYPE_P (type))
6672 enum x86_64_reg_class subclasses[MAX_CLASSES];
6674 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6678 for (i = 0; i < words; i++)
6679 classes[i] = X86_64_NO_CLASS;
6681 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6682 signalize memory class, so handle it as special case. */
6685 classes[0] = X86_64_NO_CLASS;
6689 /* Classify each field of record and merge classes. */
6690 switch (TREE_CODE (type))
6693 /* And now merge the fields of structure. */
6694 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6696 if (TREE_CODE (field) == FIELD_DECL)
6700 if (TREE_TYPE (field) == error_mark_node)
6703 /* Bitfields are always classified as integer. Handle them
6704 early, since later code would consider them to be
6705 misaligned integers. */
6706 if (DECL_BIT_FIELD (field))
6708 for (i = (int_bit_position (field)
6709 + (bit_offset % 64)) / 8 / 8;
6710 i < ((int_bit_position (field) + (bit_offset % 64))
6711 + tree_to_shwi (DECL_SIZE (field))
6714 merge_classes (X86_64_INTEGER_CLASS,
6721 type = TREE_TYPE (field);
6723 /* Flexible array member is ignored. */
6724 if (TYPE_MODE (type) == BLKmode
6725 && TREE_CODE (type) == ARRAY_TYPE
6726 && TYPE_SIZE (type) == NULL_TREE
6727 && TYPE_DOMAIN (type) != NULL_TREE
6728 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6733 if (!warned && warn_psabi)
6736 inform (input_location,
6737 "the ABI of passing struct with"
6738 " a flexible array member has"
6739 " changed in GCC 4.4");
6743 num = classify_argument (TYPE_MODE (type), type,
6745 (int_bit_position (field)
6746 + bit_offset) % 512);
6749 pos = (int_bit_position (field)
6750 + (bit_offset % 64)) / 8 / 8;
6751 for (i = 0; i < num && (i + pos) < words; i++)
6753 merge_classes (subclasses[i], classes[i + pos]);
6760 /* Arrays are handled as small records. */
6763 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6764 TREE_TYPE (type), subclasses, bit_offset);
6768 /* The partial classes are now full classes. */
6769 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6770 subclasses[0] = X86_64_SSE_CLASS;
6771 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6772 && !((bit_offset % 64) == 0 && bytes == 4))
6773 subclasses[0] = X86_64_INTEGER_CLASS;
6775 for (i = 0; i < words; i++)
6776 classes[i] = subclasses[i % num];
6781 case QUAL_UNION_TYPE:
6782 /* Unions are similar to RECORD_TYPE but offset is always 0.
6784 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6786 if (TREE_CODE (field) == FIELD_DECL)
6790 if (TREE_TYPE (field) == error_mark_node)
6793 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6794 TREE_TYPE (field), subclasses,
6798 for (i = 0; i < num && i < words; i++)
6799 classes[i] = merge_classes (subclasses[i], classes[i]);
6810 /* When size > 16 bytes, if the first one isn't
6811 X86_64_SSE_CLASS or any other ones aren't
6812 X86_64_SSEUP_CLASS, everything should be passed in
6814 if (classes[0] != X86_64_SSE_CLASS)
6817 for (i = 1; i < words; i++)
6818 if (classes[i] != X86_64_SSEUP_CLASS)
6822 /* Final merger cleanup. */
6823 for (i = 0; i < words; i++)
6825 /* If one class is MEMORY, everything should be passed in
6827 if (classes[i] == X86_64_MEMORY_CLASS)
6830 /* The X86_64_SSEUP_CLASS should be always preceded by
6831 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6832 if (classes[i] == X86_64_SSEUP_CLASS
6833 && classes[i - 1] != X86_64_SSE_CLASS
6834 && classes[i - 1] != X86_64_SSEUP_CLASS)
6836 /* The first one should never be X86_64_SSEUP_CLASS. */
6837 gcc_assert (i != 0);
6838 classes[i] = X86_64_SSE_CLASS;
6841 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6842 everything should be passed in memory. */
6843 if (classes[i] == X86_64_X87UP_CLASS
6844 && (classes[i - 1] != X86_64_X87_CLASS))
6848 /* The first one should never be X86_64_X87UP_CLASS. */
6849 gcc_assert (i != 0);
6850 if (!warned && warn_psabi)
6853 inform (input_location,
6854 "the ABI of passing union with long double"
6855 " has changed in GCC 4.4");
6863 /* Compute alignment needed. We align all types to natural boundaries with
6864 exception of XFmode that is aligned to 64bits. */
6865 if (mode != VOIDmode && mode != BLKmode)
6867 int mode_alignment = GET_MODE_BITSIZE (mode);
6870 mode_alignment = 128;
6871 else if (mode == XCmode)
6872 mode_alignment = 256;
6873 if (COMPLEX_MODE_P (mode))
6874 mode_alignment /= 2;
6875 /* Misaligned fields are always returned in memory. */
6876 if (bit_offset % mode_alignment)
6880 /* for V1xx modes, just use the base mode */
6881 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6882 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6883 mode = GET_MODE_INNER (mode);
6885 /* Classification of atomic types. */
6890 classes[0] = X86_64_SSE_CLASS;
6893 classes[0] = X86_64_SSE_CLASS;
6894 classes[1] = X86_64_SSEUP_CLASS;
6904 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6906 /* Analyze last 128 bits only. */
6907 size = (size - 1) & 0x7f;
6911 classes[0] = X86_64_INTEGERSI_CLASS;
6916 classes[0] = X86_64_INTEGER_CLASS;
6919 else if (size < 64+32)
6921 classes[0] = X86_64_INTEGER_CLASS;
6922 classes[1] = X86_64_INTEGERSI_CLASS;
6925 else if (size < 64+64)
6927 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6935 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6939 /* OImode shouldn't be used directly. */
6944 if (!(bit_offset % 64))
6945 classes[0] = X86_64_SSESF_CLASS;
6947 classes[0] = X86_64_SSE_CLASS;
6950 classes[0] = X86_64_SSEDF_CLASS;
6953 classes[0] = X86_64_X87_CLASS;
6954 classes[1] = X86_64_X87UP_CLASS;
6957 classes[0] = X86_64_SSE_CLASS;
6958 classes[1] = X86_64_SSEUP_CLASS;
6961 classes[0] = X86_64_SSE_CLASS;
6962 if (!(bit_offset % 64))
6968 if (!warned && warn_psabi)
6971 inform (input_location,
6972 "the ABI of passing structure with complex float"
6973 " member has changed in GCC 4.4");
6975 classes[1] = X86_64_SSESF_CLASS;
6979 classes[0] = X86_64_SSEDF_CLASS;
6980 classes[1] = X86_64_SSEDF_CLASS;
6983 classes[0] = X86_64_COMPLEX_X87_CLASS;
6986 /* This modes is larger than 16 bytes. */
6994 classes[0] = X86_64_SSE_CLASS;
6995 classes[1] = X86_64_SSEUP_CLASS;
6996 classes[2] = X86_64_SSEUP_CLASS;
6997 classes[3] = X86_64_SSEUP_CLASS;
7005 classes[0] = X86_64_SSE_CLASS;
7006 classes[1] = X86_64_SSEUP_CLASS;
7007 classes[2] = X86_64_SSEUP_CLASS;
7008 classes[3] = X86_64_SSEUP_CLASS;
7009 classes[4] = X86_64_SSEUP_CLASS;
7010 classes[5] = X86_64_SSEUP_CLASS;
7011 classes[6] = X86_64_SSEUP_CLASS;
7012 classes[7] = X86_64_SSEUP_CLASS;
7020 classes[0] = X86_64_SSE_CLASS;
7021 classes[1] = X86_64_SSEUP_CLASS;
7029 classes[0] = X86_64_SSE_CLASS;
7035 gcc_assert (VECTOR_MODE_P (mode));
7040 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7042 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7043 classes[0] = X86_64_INTEGERSI_CLASS;
7045 classes[0] = X86_64_INTEGER_CLASS;
7046 classes[1] = X86_64_INTEGER_CLASS;
7047 return 1 + (bytes > 8);
7051 /* Examine the argument and return set number of register required in each
7052 class. Return true iff parameter should be passed in memory. */
7055 examine_argument (machine_mode mode, const_tree type, int in_return,
7056 int *int_nregs, int *sse_nregs)
7058 enum x86_64_reg_class regclass[MAX_CLASSES];
7059 int n = classify_argument (mode, type, regclass, 0);
7066 for (n--; n >= 0; n--)
7067 switch (regclass[n])
7069 case X86_64_INTEGER_CLASS:
7070 case X86_64_INTEGERSI_CLASS:
7073 case X86_64_SSE_CLASS:
7074 case X86_64_SSESF_CLASS:
7075 case X86_64_SSEDF_CLASS:
7078 case X86_64_NO_CLASS:
7079 case X86_64_SSEUP_CLASS:
7081 case X86_64_X87_CLASS:
7082 case X86_64_X87UP_CLASS:
7083 case X86_64_COMPLEX_X87_CLASS:
7087 case X86_64_MEMORY_CLASS:
7094 /* Construct container for the argument used by GCC interface. See
7095 FUNCTION_ARG for the detailed description. */
7098 construct_container (machine_mode mode, machine_mode orig_mode,
7099 const_tree type, int in_return, int nintregs, int nsseregs,
7100 const int *intreg, int sse_regno)
7102 /* The following variables hold the static issued_error state. */
7103 static bool issued_sse_arg_error;
7104 static bool issued_sse_ret_error;
7105 static bool issued_x87_ret_error;
7107 machine_mode tmpmode;
7109 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7110 enum x86_64_reg_class regclass[MAX_CLASSES];
7114 int needed_sseregs, needed_intregs;
7115 rtx exp[MAX_CLASSES];
7118 n = classify_argument (mode, type, regclass, 0);
7121 if (examine_argument (mode, type, in_return, &needed_intregs,
7124 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7127 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7128 some less clueful developer tries to use floating-point anyway. */
7129 if (needed_sseregs && !TARGET_SSE)
7133 if (!issued_sse_ret_error)
7135 error ("SSE register return with SSE disabled");
7136 issued_sse_ret_error = true;
7139 else if (!issued_sse_arg_error)
7141 error ("SSE register argument with SSE disabled");
7142 issued_sse_arg_error = true;
7147 /* Likewise, error if the ABI requires us to return values in the
7148 x87 registers and the user specified -mno-80387. */
7149 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7150 for (i = 0; i < n; i++)
7151 if (regclass[i] == X86_64_X87_CLASS
7152 || regclass[i] == X86_64_X87UP_CLASS
7153 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7155 if (!issued_x87_ret_error)
7157 error ("x87 register return with x87 disabled");
7158 issued_x87_ret_error = true;
7163 /* First construct simple cases. Avoid SCmode, since we want to use
7164 single register to pass this type. */
7165 if (n == 1 && mode != SCmode)
7166 switch (regclass[0])
7168 case X86_64_INTEGER_CLASS:
7169 case X86_64_INTEGERSI_CLASS:
7170 return gen_rtx_REG (mode, intreg[0]);
7171 case X86_64_SSE_CLASS:
7172 case X86_64_SSESF_CLASS:
7173 case X86_64_SSEDF_CLASS:
7174 if (mode != BLKmode)
7175 return gen_reg_or_parallel (mode, orig_mode,
7176 SSE_REGNO (sse_regno));
7178 case X86_64_X87_CLASS:
7179 case X86_64_COMPLEX_X87_CLASS:
7180 return gen_rtx_REG (mode, FIRST_STACK_REG);
7181 case X86_64_NO_CLASS:
7182 /* Zero sized array, struct or class. */
7188 && regclass[0] == X86_64_SSE_CLASS
7189 && regclass[1] == X86_64_SSEUP_CLASS
7191 return gen_reg_or_parallel (mode, orig_mode,
7192 SSE_REGNO (sse_regno));
7194 && regclass[0] == X86_64_SSE_CLASS
7195 && regclass[1] == X86_64_SSEUP_CLASS
7196 && regclass[2] == X86_64_SSEUP_CLASS
7197 && regclass[3] == X86_64_SSEUP_CLASS
7199 return gen_reg_or_parallel (mode, orig_mode,
7200 SSE_REGNO (sse_regno));
7202 && regclass[0] == X86_64_SSE_CLASS
7203 && regclass[1] == X86_64_SSEUP_CLASS
7204 && regclass[2] == X86_64_SSEUP_CLASS
7205 && regclass[3] == X86_64_SSEUP_CLASS
7206 && regclass[4] == X86_64_SSEUP_CLASS
7207 && regclass[5] == X86_64_SSEUP_CLASS
7208 && regclass[6] == X86_64_SSEUP_CLASS
7209 && regclass[7] == X86_64_SSEUP_CLASS
7211 return gen_reg_or_parallel (mode, orig_mode,
7212 SSE_REGNO (sse_regno));
7214 && regclass[0] == X86_64_X87_CLASS
7215 && regclass[1] == X86_64_X87UP_CLASS)
7216 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7219 && regclass[0] == X86_64_INTEGER_CLASS
7220 && regclass[1] == X86_64_INTEGER_CLASS
7221 && (mode == CDImode || mode == TImode)
7222 && intreg[0] + 1 == intreg[1])
7223 return gen_rtx_REG (mode, intreg[0]);
7225 /* Otherwise figure out the entries of the PARALLEL. */
7226 for (i = 0; i < n; i++)
7230 switch (regclass[i])
7232 case X86_64_NO_CLASS:
7234 case X86_64_INTEGER_CLASS:
7235 case X86_64_INTEGERSI_CLASS:
7236 /* Merge TImodes on aligned occasions here too. */
7237 if (i * 8 + 8 > bytes)
7239 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7240 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7244 /* We've requested 24 bytes we
7245 don't have mode for. Use DImode. */
7246 if (tmpmode == BLKmode)
7249 = gen_rtx_EXPR_LIST (VOIDmode,
7250 gen_rtx_REG (tmpmode, *intreg),
7254 case X86_64_SSESF_CLASS:
7256 = gen_rtx_EXPR_LIST (VOIDmode,
7257 gen_rtx_REG (SFmode,
7258 SSE_REGNO (sse_regno)),
7262 case X86_64_SSEDF_CLASS:
7264 = gen_rtx_EXPR_LIST (VOIDmode,
7265 gen_rtx_REG (DFmode,
7266 SSE_REGNO (sse_regno)),
7270 case X86_64_SSE_CLASS:
7278 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7288 && regclass[1] == X86_64_SSEUP_CLASS
7289 && regclass[2] == X86_64_SSEUP_CLASS
7290 && regclass[3] == X86_64_SSEUP_CLASS);
7296 && regclass[1] == X86_64_SSEUP_CLASS
7297 && regclass[2] == X86_64_SSEUP_CLASS
7298 && regclass[3] == X86_64_SSEUP_CLASS
7299 && regclass[4] == X86_64_SSEUP_CLASS
7300 && regclass[5] == X86_64_SSEUP_CLASS
7301 && regclass[6] == X86_64_SSEUP_CLASS
7302 && regclass[7] == X86_64_SSEUP_CLASS);
7310 = gen_rtx_EXPR_LIST (VOIDmode,
7311 gen_rtx_REG (tmpmode,
7312 SSE_REGNO (sse_regno)),
7321 /* Empty aligned struct, union or class. */
7325 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7326 for (i = 0; i < nexps; i++)
7327 XVECEXP (ret, 0, i) = exp [i];
7331 /* Update the data in CUM to advance over an argument of mode MODE
7332 and data type TYPE. (TYPE is null for libcalls where that information
7333 may not be available.)
7335 Return a number of integer regsiters advanced over. */
7338 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7339 const_tree type, HOST_WIDE_INT bytes,
7340 HOST_WIDE_INT words)
7358 cum->words += words;
7359 cum->nregs -= words;
7360 cum->regno += words;
7361 if (cum->nregs >= 0)
7363 if (cum->nregs <= 0)
7371 /* OImode shouldn't be used directly. */
7375 if (cum->float_in_sse < 2)
7378 if (cum->float_in_sse < 1)
7401 if (!type || !AGGREGATE_TYPE_P (type))
7403 cum->sse_words += words;
7404 cum->sse_nregs -= 1;
7405 cum->sse_regno += 1;
7406 if (cum->sse_nregs <= 0)
7420 if (!type || !AGGREGATE_TYPE_P (type))
7422 cum->mmx_words += words;
7423 cum->mmx_nregs -= 1;
7424 cum->mmx_regno += 1;
7425 if (cum->mmx_nregs <= 0)
7438 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7439 const_tree type, HOST_WIDE_INT words, bool named)
7441 int int_nregs, sse_nregs;
7443 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7444 if (!named && (VALID_AVX512F_REG_MODE (mode)
7445 || VALID_AVX256_REG_MODE (mode)))
7448 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7449 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7451 cum->nregs -= int_nregs;
7452 cum->sse_nregs -= sse_nregs;
7453 cum->regno += int_nregs;
7454 cum->sse_regno += sse_nregs;
7459 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7460 cum->words = (cum->words + align - 1) & ~(align - 1);
7461 cum->words += words;
7467 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7468 HOST_WIDE_INT words)
7470 /* Otherwise, this should be passed indirect. */
7471 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7473 cum->words += words;
7483 /* Update the data in CUM to advance over an argument of mode MODE and
7484 data type TYPE. (TYPE is null for libcalls where that information
7485 may not be available.) */
7488 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7489 const_tree type, bool named)
7491 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7492 HOST_WIDE_INT bytes, words;
7495 if (mode == BLKmode)
7496 bytes = int_size_in_bytes (type);
7498 bytes = GET_MODE_SIZE (mode);
7499 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7502 mode = type_natural_mode (type, NULL, false);
7504 if ((type && POINTER_BOUNDS_TYPE_P (type))
7505 || POINTER_BOUNDS_MODE_P (mode))
7507 /* If we pass bounds in BT then just update remained bounds count. */
7508 if (cum->bnds_in_bt)
7514 /* Update remained number of bounds to force. */
7515 if (cum->force_bnd_pass)
7516 cum->force_bnd_pass--;
7523 /* The first arg not going to Bounds Tables resets this counter. */
7524 cum->bnds_in_bt = 0;
7525 /* For unnamed args we always pass bounds to avoid bounds mess when
7526 passed and received types do not match. If bounds do not follow
7527 unnamed arg, still pretend required number of bounds were passed. */
7528 if (cum->force_bnd_pass)
7530 cum->bnd_regno += cum->force_bnd_pass;
7531 cum->force_bnd_pass = 0;
7534 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7535 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7536 else if (TARGET_64BIT)
7537 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7539 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7541 /* For stdarg we expect bounds to be passed for each value passed
7544 cum->force_bnd_pass = nregs;
7545 /* For pointers passed in memory we expect bounds passed in Bounds
7548 cum->bnds_in_bt = chkp_type_bounds_count (type);
7551 /* Define where to put the arguments to a function.
7552 Value is zero to push the argument on the stack,
7553 or a hard register in which to store the argument.
7555 MODE is the argument's machine mode.
7556 TYPE is the data type of the argument (as a tree).
7557 This is null for libcalls where that information may
7559 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7560 the preceding args and about the function being called.
7561 NAMED is nonzero if this argument is a named parameter
7562 (otherwise it is an extra parameter matching an ellipsis). */
7565 function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7566 machine_mode orig_mode, const_tree type,
7567 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7569 /* Avoid the AL settings for the Unix64 ABI. */
7570 if (mode == VOIDmode)
7586 if (words <= cum->nregs)
7588 int regno = cum->regno;
7590 /* Fastcall allocates the first two DWORD (SImode) or
7591 smaller arguments to ECX and EDX if it isn't an
7597 || (type && AGGREGATE_TYPE_P (type)))
7600 /* ECX not EAX is the first allocated register. */
7601 if (regno == AX_REG)
7604 return gen_rtx_REG (mode, regno);
7609 if (cum->float_in_sse < 2)
7612 if (cum->float_in_sse < 1)
7616 /* In 32bit, we pass TImode in xmm registers. */
7623 if (!type || !AGGREGATE_TYPE_P (type))
7626 return gen_reg_or_parallel (mode, orig_mode,
7627 cum->sse_regno + FIRST_SSE_REG);
7633 /* OImode and XImode shouldn't be used directly. */
7648 if (!type || !AGGREGATE_TYPE_P (type))
7651 return gen_reg_or_parallel (mode, orig_mode,
7652 cum->sse_regno + FIRST_SSE_REG);
7662 if (!type || !AGGREGATE_TYPE_P (type))
7665 return gen_reg_or_parallel (mode, orig_mode,
7666 cum->mmx_regno + FIRST_MMX_REG);
7675 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7676 machine_mode orig_mode, const_tree type, bool named)
7678 /* Handle a hidden AL argument containing number of registers
7679 for varargs x86-64 functions. */
7680 if (mode == VOIDmode)
7681 return GEN_INT (cum->maybe_vaarg
7682 ? (cum->sse_nregs < 0
7683 ? X86_64_SSE_REGPARM_MAX
7704 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7710 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7712 &x86_64_int_parameter_registers [cum->regno],
7717 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7718 machine_mode orig_mode, bool named,
7719 HOST_WIDE_INT bytes)
7723 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7724 We use value of -2 to specify that current function call is MSABI. */
7725 if (mode == VOIDmode)
7726 return GEN_INT (-2);
7728 /* If we've run out of registers, it goes on the stack. */
7729 if (cum->nregs == 0)
7732 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7734 /* Only floating point modes are passed in anything but integer regs. */
7735 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7738 regno = cum->regno + FIRST_SSE_REG;
7743 /* Unnamed floating parameters are passed in both the
7744 SSE and integer registers. */
7745 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7746 t2 = gen_rtx_REG (mode, regno);
7747 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7748 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7749 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7752 /* Handle aggregated types passed in register. */
7753 if (orig_mode == BLKmode)
7755 if (bytes > 0 && bytes <= 8)
7756 mode = (bytes > 4 ? DImode : SImode);
7757 if (mode == BLKmode)
7761 return gen_reg_or_parallel (mode, orig_mode, regno);
7764 /* Return where to put the arguments to a function.
7765 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7767 MODE is the argument's machine mode. TYPE is the data type of the
7768 argument. It is null for libcalls where that information may not be
7769 available. CUM gives information about the preceding args and about
7770 the function being called. NAMED is nonzero if this argument is a
7771 named parameter (otherwise it is an extra parameter matching an
7775 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
7776 const_tree type, bool named)
7778 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7779 machine_mode mode = omode;
7780 HOST_WIDE_INT bytes, words;
7783 /* All pointer bounds argumntas are handled separately here. */
7784 if ((type && POINTER_BOUNDS_TYPE_P (type))
7785 || POINTER_BOUNDS_MODE_P (mode))
7787 /* Return NULL if bounds are forced to go in Bounds Table. */
7788 if (cum->bnds_in_bt)
7790 /* Return the next available bound reg if any. */
7791 else if (cum->bnd_regno <= LAST_BND_REG)
7792 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
7793 /* Return the next special slot number otherwise. */
7795 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
7800 if (mode == BLKmode)
7801 bytes = int_size_in_bytes (type);
7803 bytes = GET_MODE_SIZE (mode);
7804 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7806 /* To simplify the code below, represent vector types with a vector mode
7807 even if MMX/SSE are not active. */
7808 if (type && TREE_CODE (type) == VECTOR_TYPE)
7809 mode = type_natural_mode (type, cum, false);
7811 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7812 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7813 else if (TARGET_64BIT)
7814 arg = function_arg_64 (cum, mode, omode, type, named);
7816 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7821 /* A C expression that indicates when an argument must be passed by
7822 reference. If nonzero for an argument, a copy of that argument is
7823 made in memory and a pointer to the argument is passed instead of
7824 the argument itself. The pointer is passed in whatever way is
7825 appropriate for passing a pointer to that type. */
7828 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7829 const_tree type, bool)
7831 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7833 /* See Windows x64 Software Convention. */
7834 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7836 int msize = (int) GET_MODE_SIZE (mode);
7839 /* Arrays are passed by reference. */
7840 if (TREE_CODE (type) == ARRAY_TYPE)
7843 if (AGGREGATE_TYPE_P (type))
7845 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7846 are passed by reference. */
7847 msize = int_size_in_bytes (type);
7851 /* __m128 is passed by reference. */
7853 case 1: case 2: case 4: case 8:
7859 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7865 /* Return true when TYPE should be 128bit aligned for 32bit argument
7866 passing ABI. XXX: This function is obsolete and is only used for
7867 checking psABI compatibility with previous versions of GCC. */
7870 ix86_compat_aligned_value_p (const_tree type)
7872 machine_mode mode = TYPE_MODE (type);
7873 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7877 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7879 if (TYPE_ALIGN (type) < 128)
7882 if (AGGREGATE_TYPE_P (type))
7884 /* Walk the aggregates recursively. */
7885 switch (TREE_CODE (type))
7889 case QUAL_UNION_TYPE:
7893 /* Walk all the structure fields. */
7894 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7896 if (TREE_CODE (field) == FIELD_DECL
7897 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7904 /* Just for use if some languages passes arrays by value. */
7905 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7916 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7917 XXX: This function is obsolete and is only used for checking psABI
7918 compatibility with previous versions of GCC. */
7921 ix86_compat_function_arg_boundary (machine_mode mode,
7922 const_tree type, unsigned int align)
7924 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7925 natural boundaries. */
7926 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7928 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7929 make an exception for SSE modes since these require 128bit
7932 The handling here differs from field_alignment. ICC aligns MMX
7933 arguments to 4 byte boundaries, while structure fields are aligned
7934 to 8 byte boundaries. */
7937 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7938 align = PARM_BOUNDARY;
7942 if (!ix86_compat_aligned_value_p (type))
7943 align = PARM_BOUNDARY;
7946 if (align > BIGGEST_ALIGNMENT)
7947 align = BIGGEST_ALIGNMENT;
7951 /* Return true when TYPE should be 128bit aligned for 32bit argument
7955 ix86_contains_aligned_value_p (const_tree type)
7957 machine_mode mode = TYPE_MODE (type);
7959 if (mode == XFmode || mode == XCmode)
7962 if (TYPE_ALIGN (type) < 128)
7965 if (AGGREGATE_TYPE_P (type))
7967 /* Walk the aggregates recursively. */
7968 switch (TREE_CODE (type))
7972 case QUAL_UNION_TYPE:
7976 /* Walk all the structure fields. */
7977 for (field = TYPE_FIELDS (type);
7979 field = DECL_CHAIN (field))
7981 if (TREE_CODE (field) == FIELD_DECL
7982 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7989 /* Just for use if some languages passes arrays by value. */
7990 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7999 return TYPE_ALIGN (type) >= 128;
8004 /* Gives the alignment boundary, in bits, of an argument with the
8005 specified mode and type. */
8008 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8013 /* Since the main variant type is used for call, we convert it to
8014 the main variant type. */
8015 type = TYPE_MAIN_VARIANT (type);
8016 align = TYPE_ALIGN (type);
8019 align = GET_MODE_ALIGNMENT (mode);
8020 if (align < PARM_BOUNDARY)
8021 align = PARM_BOUNDARY;
8025 unsigned int saved_align = align;
8029 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8032 if (mode == XFmode || mode == XCmode)
8033 align = PARM_BOUNDARY;
8035 else if (!ix86_contains_aligned_value_p (type))
8036 align = PARM_BOUNDARY;
8039 align = PARM_BOUNDARY;
8044 && align != ix86_compat_function_arg_boundary (mode, type,
8048 inform (input_location,
8049 "The ABI for passing parameters with %d-byte"
8050 " alignment has changed in GCC 4.6",
8051 align / BITS_PER_UNIT);
8058 /* Return true if N is a possible register number of function value. */
8061 ix86_function_value_regno_p (const unsigned int regno)
8068 return (!TARGET_64BIT || ix86_abi != MS_ABI);
8071 return TARGET_64BIT && ix86_abi != MS_ABI;
8074 return chkp_function_instrumented_p (current_function_decl);
8076 /* Complex values are returned in %st(0)/%st(1) pair. */
8079 /* TODO: The function should depend on current function ABI but
8080 builtins.c would need updating then. Therefore we use the
8082 if (TARGET_64BIT && ix86_abi == MS_ABI)
8084 return TARGET_FLOAT_RETURNS_IN_80387;
8086 /* Complex values are returned in %xmm0/%xmm1 pair. */
8092 if (TARGET_MACHO || TARGET_64BIT)
8100 /* Define how to find the value returned by a function.
8101 VALTYPE is the data type of the value (as a tree).
8102 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8103 otherwise, FUNC is 0. */
8106 function_value_32 (machine_mode orig_mode, machine_mode mode,
8107 const_tree fntype, const_tree fn)
8111 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8112 we normally prevent this case when mmx is not available. However
8113 some ABIs may require the result to be returned like DImode. */
8114 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8115 regno = FIRST_MMX_REG;
8117 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8118 we prevent this case when sse is not available. However some ABIs
8119 may require the result to be returned like integer TImode. */
8120 else if (mode == TImode
8121 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8122 regno = FIRST_SSE_REG;
8124 /* 32-byte vector modes in %ymm0. */
8125 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8126 regno = FIRST_SSE_REG;
8128 /* 64-byte vector modes in %zmm0. */
8129 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8130 regno = FIRST_SSE_REG;
8132 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8133 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8134 regno = FIRST_FLOAT_REG;
8136 /* Most things go in %eax. */
8139 /* Override FP return register with %xmm0 for local functions when
8140 SSE math is enabled or for functions with sseregparm attribute. */
8141 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8143 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8144 if ((sse_level >= 1 && mode == SFmode)
8145 || (sse_level == 2 && mode == DFmode))
8146 regno = FIRST_SSE_REG;
8149 /* OImode shouldn't be used directly. */
8150 gcc_assert (mode != OImode);
8152 return gen_rtx_REG (orig_mode, regno);
8156 function_value_64 (machine_mode orig_mode, machine_mode mode,
8161 /* Handle libcalls, which don't provide a type node. */
8162 if (valtype == NULL)
8176 regno = FIRST_SSE_REG;
8180 regno = FIRST_FLOAT_REG;
8188 return gen_rtx_REG (mode, regno);
8190 else if (POINTER_TYPE_P (valtype))
8192 /* Pointers are always returned in word_mode. */
8196 ret = construct_container (mode, orig_mode, valtype, 1,
8197 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8198 x86_64_int_return_registers, 0);
8200 /* For zero sized structures, construct_container returns NULL, but we
8201 need to keep rest of compiler happy by returning meaningful value. */
8203 ret = gen_rtx_REG (orig_mode, AX_REG);
8209 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8212 unsigned int regno = AX_REG;
8216 switch (GET_MODE_SIZE (mode))
8219 if (valtype != NULL_TREE
8220 && !VECTOR_INTEGER_TYPE_P (valtype)
8221 && !VECTOR_INTEGER_TYPE_P (valtype)
8222 && !INTEGRAL_TYPE_P (valtype)
8223 && !VECTOR_FLOAT_TYPE_P (valtype))
8225 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8226 && !COMPLEX_MODE_P (mode))
8227 regno = FIRST_SSE_REG;
8231 if (mode == SFmode || mode == DFmode)
8232 regno = FIRST_SSE_REG;
8238 return gen_rtx_REG (orig_mode, regno);
8242 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8243 machine_mode orig_mode, machine_mode mode)
8245 const_tree fn, fntype;
8248 if (fntype_or_decl && DECL_P (fntype_or_decl))
8249 fn = fntype_or_decl;
8250 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8252 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8253 || POINTER_BOUNDS_MODE_P (mode))
8254 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8255 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8256 return function_value_ms_64 (orig_mode, mode, valtype);
8257 else if (TARGET_64BIT)
8258 return function_value_64 (orig_mode, mode, valtype);
8260 return function_value_32 (orig_mode, mode, fntype, fn);
8264 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8266 machine_mode mode, orig_mode;
8268 orig_mode = TYPE_MODE (valtype);
8269 mode = type_natural_mode (valtype, NULL, true);
8270 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8273 /* Return an RTX representing a place where a function returns
8274 or recieves pointer bounds or NULL if no bounds are returned.
8276 VALTYPE is a data type of a value returned by the function.
8278 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8279 or FUNCTION_TYPE of the function.
8281 If OUTGOING is false, return a place in which the caller will
8282 see the return value. Otherwise, return a place where a
8283 function returns a value. */
8286 ix86_function_value_bounds (const_tree valtype,
8287 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8288 bool outgoing ATTRIBUTE_UNUSED)
8292 if (BOUNDED_TYPE_P (valtype))
8293 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8294 else if (chkp_type_has_pointer (valtype))
8299 unsigned i, bnd_no = 0;
8301 bitmap_obstack_initialize (NULL);
8302 slots = BITMAP_ALLOC (NULL);
8303 chkp_find_bound_slots (valtype, slots);
8305 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8307 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8308 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8309 gcc_assert (bnd_no < 2);
8310 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8313 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8315 BITMAP_FREE (slots);
8316 bitmap_obstack_release (NULL);
8324 /* Pointer function arguments and return values are promoted to
8328 ix86_promote_function_mode (const_tree type, machine_mode mode,
8329 int *punsignedp, const_tree fntype,
8332 if (type != NULL_TREE && POINTER_TYPE_P (type))
8334 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8337 return default_promote_function_mode (type, mode, punsignedp, fntype,
8341 /* Return true if a structure, union or array with MODE containing FIELD
8342 should be accessed using BLKmode. */
8345 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8347 /* Union with XFmode must be in BLKmode. */
8348 return (mode == XFmode
8349 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8350 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8354 ix86_libcall_value (machine_mode mode)
8356 return ix86_function_value_1 (NULL, NULL, mode, mode);
8359 /* Return true iff type is returned in memory. */
8362 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8364 #ifdef SUBTARGET_RETURN_IN_MEMORY
8365 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8367 const machine_mode mode = type_natural_mode (type, NULL, true);
8370 if (POINTER_BOUNDS_TYPE_P (type))
8375 if (ix86_function_type_abi (fntype) == MS_ABI)
8377 size = int_size_in_bytes (type);
8379 /* __m128 is returned in xmm0. */
8380 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8381 || INTEGRAL_TYPE_P (type)
8382 || VECTOR_FLOAT_TYPE_P (type))
8383 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8384 && !COMPLEX_MODE_P (mode)
8385 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8388 /* Otherwise, the size must be exactly in [1248]. */
8389 return size != 1 && size != 2 && size != 4 && size != 8;
8393 int needed_intregs, needed_sseregs;
8395 return examine_argument (mode, type, 1,
8396 &needed_intregs, &needed_sseregs);
8401 if (mode == BLKmode)
8404 size = int_size_in_bytes (type);
8406 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8409 if (VECTOR_MODE_P (mode) || mode == TImode)
8411 /* User-created vectors small enough to fit in EAX. */
8415 /* Unless ABI prescibes otherwise,
8416 MMX/3dNow values are returned in MM0 if available. */
8419 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8421 /* SSE values are returned in XMM0 if available. */
8425 /* AVX values are returned in YMM0 if available. */
8429 /* AVX512F values are returned in ZMM0 if available. */
8431 return !TARGET_AVX512F;
8440 /* OImode shouldn't be used directly. */
8441 gcc_assert (mode != OImode);
8449 /* Create the va_list data type. */
8451 /* Returns the calling convention specific va_list date type.
8452 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8455 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8457 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8459 /* For i386 we use plain pointer to argument area. */
8460 if (!TARGET_64BIT || abi == MS_ABI)
8461 return build_pointer_type (char_type_node);
8463 record = lang_hooks.types.make_type (RECORD_TYPE);
8464 type_decl = build_decl (BUILTINS_LOCATION,
8465 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8467 f_gpr = build_decl (BUILTINS_LOCATION,
8468 FIELD_DECL, get_identifier ("gp_offset"),
8469 unsigned_type_node);
8470 f_fpr = build_decl (BUILTINS_LOCATION,
8471 FIELD_DECL, get_identifier ("fp_offset"),
8472 unsigned_type_node);
8473 f_ovf = build_decl (BUILTINS_LOCATION,
8474 FIELD_DECL, get_identifier ("overflow_arg_area"),
8476 f_sav = build_decl (BUILTINS_LOCATION,
8477 FIELD_DECL, get_identifier ("reg_save_area"),
8480 va_list_gpr_counter_field = f_gpr;
8481 va_list_fpr_counter_field = f_fpr;
8483 DECL_FIELD_CONTEXT (f_gpr) = record;
8484 DECL_FIELD_CONTEXT (f_fpr) = record;
8485 DECL_FIELD_CONTEXT (f_ovf) = record;
8486 DECL_FIELD_CONTEXT (f_sav) = record;
8488 TYPE_STUB_DECL (record) = type_decl;
8489 TYPE_NAME (record) = type_decl;
8490 TYPE_FIELDS (record) = f_gpr;
8491 DECL_CHAIN (f_gpr) = f_fpr;
8492 DECL_CHAIN (f_fpr) = f_ovf;
8493 DECL_CHAIN (f_ovf) = f_sav;
8495 layout_type (record);
8497 /* The correct type is an array type of one element. */
8498 return build_array_type (record, build_index_type (size_zero_node));
8501 /* Setup the builtin va_list data type and for 64-bit the additional
8502 calling convention specific va_list data types. */
8505 ix86_build_builtin_va_list (void)
8507 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8509 /* Initialize abi specific va_list builtin types. */
8513 if (ix86_abi == MS_ABI)
8515 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8516 if (TREE_CODE (t) != RECORD_TYPE)
8517 t = build_variant_type_copy (t);
8518 sysv_va_list_type_node = t;
8523 if (TREE_CODE (t) != RECORD_TYPE)
8524 t = build_variant_type_copy (t);
8525 sysv_va_list_type_node = t;
8527 if (ix86_abi != MS_ABI)
8529 t = ix86_build_builtin_va_list_abi (MS_ABI);
8530 if (TREE_CODE (t) != RECORD_TYPE)
8531 t = build_variant_type_copy (t);
8532 ms_va_list_type_node = t;
8537 if (TREE_CODE (t) != RECORD_TYPE)
8538 t = build_variant_type_copy (t);
8539 ms_va_list_type_node = t;
8546 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8549 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8555 /* GPR size of varargs save area. */
8556 if (cfun->va_list_gpr_size)
8557 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8559 ix86_varargs_gpr_size = 0;
8561 /* FPR size of varargs save area. We don't need it if we don't pass
8562 anything in SSE registers. */
8563 if (TARGET_SSE && cfun->va_list_fpr_size)
8564 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8566 ix86_varargs_fpr_size = 0;
8568 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8571 save_area = frame_pointer_rtx;
8572 set = get_varargs_alias_set ();
8574 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8575 if (max > X86_64_REGPARM_MAX)
8576 max = X86_64_REGPARM_MAX;
8578 for (i = cum->regno; i < max; i++)
8580 mem = gen_rtx_MEM (word_mode,
8581 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8582 MEM_NOTRAP_P (mem) = 1;
8583 set_mem_alias_set (mem, set);
8584 emit_move_insn (mem,
8585 gen_rtx_REG (word_mode,
8586 x86_64_int_parameter_registers[i]));
8589 if (ix86_varargs_fpr_size)
8592 rtx_code_label *label;
8595 /* Now emit code to save SSE registers. The AX parameter contains number
8596 of SSE parameter registers used to call this function, though all we
8597 actually check here is the zero/non-zero status. */
8599 label = gen_label_rtx ();
8600 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8601 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8604 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8605 we used movdqa (i.e. TImode) instead? Perhaps even better would
8606 be if we could determine the real mode of the data, via a hook
8607 into pass_stdarg. Ignore all that for now. */
8609 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8610 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8612 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8613 if (max > X86_64_SSE_REGPARM_MAX)
8614 max = X86_64_SSE_REGPARM_MAX;
8616 for (i = cum->sse_regno; i < max; ++i)
8618 mem = plus_constant (Pmode, save_area,
8619 i * 16 + ix86_varargs_gpr_size);
8620 mem = gen_rtx_MEM (smode, mem);
8621 MEM_NOTRAP_P (mem) = 1;
8622 set_mem_alias_set (mem, set);
8623 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8625 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8633 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8635 alias_set_type set = get_varargs_alias_set ();
8638 /* Reset to zero, as there might be a sysv vaarg used
8640 ix86_varargs_gpr_size = 0;
8641 ix86_varargs_fpr_size = 0;
8643 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8647 mem = gen_rtx_MEM (Pmode,
8648 plus_constant (Pmode, virtual_incoming_args_rtx,
8649 i * UNITS_PER_WORD));
8650 MEM_NOTRAP_P (mem) = 1;
8651 set_mem_alias_set (mem, set);
8653 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8654 emit_move_insn (mem, reg);
8659 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8660 tree type, int *, int no_rtl)
8662 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8663 CUMULATIVE_ARGS next_cum;
8666 /* This argument doesn't appear to be used anymore. Which is good,
8667 because the old code here didn't suppress rtl generation. */
8668 gcc_assert (!no_rtl);
8673 fntype = TREE_TYPE (current_function_decl);
8675 /* For varargs, we do not want to skip the dummy va_dcl argument.
8676 For stdargs, we do want to skip the last named argument. */
8678 if (stdarg_p (fntype))
8679 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8682 if (cum->call_abi == MS_ABI)
8683 setup_incoming_varargs_ms_64 (&next_cum);
8685 setup_incoming_varargs_64 (&next_cum);
8689 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8690 enum machine_mode mode,
8692 int *pretend_size ATTRIBUTE_UNUSED,
8695 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8696 CUMULATIVE_ARGS next_cum;
8699 int bnd_reg, i, max;
8701 gcc_assert (!no_rtl);
8703 /* Do nothing if we use plain pointer to argument area. */
8704 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8707 fntype = TREE_TYPE (current_function_decl);
8709 /* For varargs, we do not want to skip the dummy va_dcl argument.
8710 For stdargs, we do want to skip the last named argument. */
8712 if (stdarg_p (fntype))
8713 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8715 save_area = frame_pointer_rtx;
8717 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8718 if (max > X86_64_REGPARM_MAX)
8719 max = X86_64_REGPARM_MAX;
8721 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8722 if (chkp_function_instrumented_p (current_function_decl))
8723 for (i = cum->regno; i < max; i++)
8725 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8726 rtx reg = gen_rtx_REG (DImode,
8727 x86_64_int_parameter_registers[i]);
8731 if (bnd_reg <= LAST_BND_REG)
8732 bounds = gen_rtx_REG (BNDmode, bnd_reg);
8736 plus_constant (Pmode, arg_pointer_rtx,
8737 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8738 bounds = gen_reg_rtx (BNDmode);
8739 emit_insn (BNDmode == BND64mode
8740 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8741 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
8744 emit_insn (BNDmode == BND64mode
8745 ? gen_bnd64_stx (addr, ptr, bounds)
8746 : gen_bnd32_stx (addr, ptr, bounds));
8753 /* Checks if TYPE is of kind va_list char *. */
8756 is_va_list_char_pointer (tree type)
8760 /* For 32-bit it is always true. */
8763 canonic = ix86_canonical_va_list_type (type);
8764 return (canonic == ms_va_list_type_node
8765 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8768 /* Implement va_start. */
8771 ix86_va_start (tree valist, rtx nextarg)
8773 HOST_WIDE_INT words, n_gpr, n_fpr;
8774 tree f_gpr, f_fpr, f_ovf, f_sav;
8775 tree gpr, fpr, ovf, sav, t;
8779 if (flag_split_stack
8780 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8782 unsigned int scratch_regno;
8784 /* When we are splitting the stack, we can't refer to the stack
8785 arguments using internal_arg_pointer, because they may be on
8786 the old stack. The split stack prologue will arrange to
8787 leave a pointer to the old stack arguments in a scratch
8788 register, which we here copy to a pseudo-register. The split
8789 stack prologue can't set the pseudo-register directly because
8790 it (the prologue) runs before any registers have been saved. */
8792 scratch_regno = split_stack_prologue_scratch_regno ();
8793 if (scratch_regno != INVALID_REGNUM)
8798 reg = gen_reg_rtx (Pmode);
8799 cfun->machine->split_stack_varargs_pointer = reg;
8802 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8806 push_topmost_sequence ();
8807 emit_insn_after (seq, entry_of_function ());
8808 pop_topmost_sequence ();
8812 /* Only 64bit target needs something special. */
8813 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8815 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8816 std_expand_builtin_va_start (valist, nextarg);
8821 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8822 next = expand_binop (ptr_mode, add_optab,
8823 cfun->machine->split_stack_varargs_pointer,
8824 crtl->args.arg_offset_rtx,
8825 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8826 convert_move (va_r, next, 0);
8828 /* Store zero bounds for va_list. */
8829 if (chkp_function_instrumented_p (current_function_decl))
8830 chkp_expand_bounds_reset_for_mem (valist,
8831 make_tree (TREE_TYPE (valist),
8838 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8839 f_fpr = DECL_CHAIN (f_gpr);
8840 f_ovf = DECL_CHAIN (f_fpr);
8841 f_sav = DECL_CHAIN (f_ovf);
8843 valist = build_simple_mem_ref (valist);
8844 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8845 /* The following should be folded into the MEM_REF offset. */
8846 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8848 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8850 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8852 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8855 /* Count number of gp and fp argument registers used. */
8856 words = crtl->args.info.words;
8857 n_gpr = crtl->args.info.regno;
8858 n_fpr = crtl->args.info.sse_regno;
8860 if (cfun->va_list_gpr_size)
8862 type = TREE_TYPE (gpr);
8863 t = build2 (MODIFY_EXPR, type,
8864 gpr, build_int_cst (type, n_gpr * 8));
8865 TREE_SIDE_EFFECTS (t) = 1;
8866 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8869 if (TARGET_SSE && cfun->va_list_fpr_size)
8871 type = TREE_TYPE (fpr);
8872 t = build2 (MODIFY_EXPR, type, fpr,
8873 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8874 TREE_SIDE_EFFECTS (t) = 1;
8875 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8878 /* Find the overflow area. */
8879 type = TREE_TYPE (ovf);
8880 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8881 ovf_rtx = crtl->args.internal_arg_pointer;
8883 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8884 t = make_tree (type, ovf_rtx);
8886 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8888 /* Store zero bounds for overflow area pointer. */
8889 if (chkp_function_instrumented_p (current_function_decl))
8890 chkp_expand_bounds_reset_for_mem (ovf, t);
8892 t = build2 (MODIFY_EXPR, type, ovf, t);
8893 TREE_SIDE_EFFECTS (t) = 1;
8894 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8896 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8898 /* Find the register save area.
8899 Prologue of the function save it right above stack frame. */
8900 type = TREE_TYPE (sav);
8901 t = make_tree (type, frame_pointer_rtx);
8902 if (!ix86_varargs_gpr_size)
8903 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8905 /* Store zero bounds for save area pointer. */
8906 if (chkp_function_instrumented_p (current_function_decl))
8907 chkp_expand_bounds_reset_for_mem (sav, t);
8909 t = build2 (MODIFY_EXPR, type, sav, t);
8910 TREE_SIDE_EFFECTS (t) = 1;
8911 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8915 /* Implement va_arg. */
8918 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8921 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8922 tree f_gpr, f_fpr, f_ovf, f_sav;
8923 tree gpr, fpr, ovf, sav, t;
8925 tree lab_false, lab_over = NULL_TREE;
8930 machine_mode nat_mode;
8931 unsigned int arg_boundary;
8933 /* Only 64bit target needs something special. */
8934 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8935 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8937 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8938 f_fpr = DECL_CHAIN (f_gpr);
8939 f_ovf = DECL_CHAIN (f_fpr);
8940 f_sav = DECL_CHAIN (f_ovf);
8942 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8943 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8944 valist = build_va_arg_indirect_ref (valist);
8945 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8946 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8947 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8949 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8951 type = build_pointer_type (type);
8952 size = int_size_in_bytes (type);
8953 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8955 nat_mode = type_natural_mode (type, NULL, false);
8970 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8971 if (!TARGET_64BIT_MS_ABI)
8978 container = construct_container (nat_mode, TYPE_MODE (type),
8979 type, 0, X86_64_REGPARM_MAX,
8980 X86_64_SSE_REGPARM_MAX, intreg,
8985 /* Pull the value out of the saved registers. */
8987 addr = create_tmp_var (ptr_type_node, "addr");
8991 int needed_intregs, needed_sseregs;
8993 tree int_addr, sse_addr;
8995 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8996 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8998 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9000 need_temp = (!REG_P (container)
9001 && ((needed_intregs && TYPE_ALIGN (type) > 64)
9002 || TYPE_ALIGN (type) > 128));
9004 /* In case we are passing structure, verify that it is consecutive block
9005 on the register save area. If not we need to do moves. */
9006 if (!need_temp && !REG_P (container))
9008 /* Verify that all registers are strictly consecutive */
9009 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9013 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9015 rtx slot = XVECEXP (container, 0, i);
9016 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9017 || INTVAL (XEXP (slot, 1)) != i * 16)
9025 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9027 rtx slot = XVECEXP (container, 0, i);
9028 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9029 || INTVAL (XEXP (slot, 1)) != i * 8)
9041 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9042 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9045 /* First ensure that we fit completely in registers. */
9048 t = build_int_cst (TREE_TYPE (gpr),
9049 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9050 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9051 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9052 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9053 gimplify_and_add (t, pre_p);
9057 t = build_int_cst (TREE_TYPE (fpr),
9058 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9059 + X86_64_REGPARM_MAX * 8);
9060 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9061 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9062 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9063 gimplify_and_add (t, pre_p);
9066 /* Compute index to start of area used for integer regs. */
9069 /* int_addr = gpr + sav; */
9070 t = fold_build_pointer_plus (sav, gpr);
9071 gimplify_assign (int_addr, t, pre_p);
9075 /* sse_addr = fpr + sav; */
9076 t = fold_build_pointer_plus (sav, fpr);
9077 gimplify_assign (sse_addr, t, pre_p);
9081 int i, prev_size = 0;
9082 tree temp = create_tmp_var (type, "va_arg_tmp");
9085 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9086 gimplify_assign (addr, t, pre_p);
9088 for (i = 0; i < XVECLEN (container, 0); i++)
9090 rtx slot = XVECEXP (container, 0, i);
9091 rtx reg = XEXP (slot, 0);
9092 machine_mode mode = GET_MODE (reg);
9098 tree dest_addr, dest;
9099 int cur_size = GET_MODE_SIZE (mode);
9101 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9102 prev_size = INTVAL (XEXP (slot, 1));
9103 if (prev_size + cur_size > size)
9105 cur_size = size - prev_size;
9106 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9107 if (mode == BLKmode)
9110 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9111 if (mode == GET_MODE (reg))
9112 addr_type = build_pointer_type (piece_type);
9114 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9116 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9119 if (SSE_REGNO_P (REGNO (reg)))
9121 src_addr = sse_addr;
9122 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9126 src_addr = int_addr;
9127 src_offset = REGNO (reg) * 8;
9129 src_addr = fold_convert (addr_type, src_addr);
9130 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9132 dest_addr = fold_convert (daddr_type, addr);
9133 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9134 if (cur_size == GET_MODE_SIZE (mode))
9136 src = build_va_arg_indirect_ref (src_addr);
9137 dest = build_va_arg_indirect_ref (dest_addr);
9139 gimplify_assign (dest, src, pre_p);
9144 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9145 3, dest_addr, src_addr,
9146 size_int (cur_size));
9147 gimplify_and_add (copy, pre_p);
9149 prev_size += cur_size;
9155 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9156 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9157 gimplify_assign (gpr, t, pre_p);
9162 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9163 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9164 gimplify_assign (fpr, t, pre_p);
9167 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9169 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9172 /* ... otherwise out of the overflow area. */
9174 /* When we align parameter on stack for caller, if the parameter
9175 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9176 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9177 here with caller. */
9178 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9179 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9180 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9182 /* Care for on-stack alignment if needed. */
9183 if (arg_boundary <= 64 || size == 0)
9187 HOST_WIDE_INT align = arg_boundary / 8;
9188 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9189 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9190 build_int_cst (TREE_TYPE (t), -align));
9193 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9194 gimplify_assign (addr, t, pre_p);
9196 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9197 gimplify_assign (unshare_expr (ovf), t, pre_p);
9200 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9202 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9203 addr = fold_convert (ptrtype, addr);
9206 addr = build_va_arg_indirect_ref (addr);
9207 return build_va_arg_indirect_ref (addr);
9210 /* Return true if OPNUM's MEM should be matched
9211 in movabs* patterns. */
9214 ix86_check_movabs (rtx insn, int opnum)
9218 set = PATTERN (insn);
9219 if (GET_CODE (set) == PARALLEL)
9220 set = XVECEXP (set, 0, 0);
9221 gcc_assert (GET_CODE (set) == SET);
9222 mem = XEXP (set, opnum);
9223 while (GET_CODE (mem) == SUBREG)
9224 mem = SUBREG_REG (mem);
9225 gcc_assert (MEM_P (mem));
9226 return volatile_ok || !MEM_VOLATILE_P (mem);
9229 /* Initialize the table of extra 80387 mathematical constants. */
9232 init_ext_80387_constants (void)
9234 static const char * cst[5] =
9236 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9237 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9238 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9239 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9240 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9244 for (i = 0; i < 5; i++)
9246 real_from_string (&ext_80387_constants_table[i], cst[i]);
9247 /* Ensure each constant is rounded to XFmode precision. */
9248 real_convert (&ext_80387_constants_table[i],
9249 XFmode, &ext_80387_constants_table[i]);
9252 ext_80387_constants_init = 1;
9255 /* Return non-zero if the constant is something that
9256 can be loaded with a special instruction. */
9259 standard_80387_constant_p (rtx x)
9261 machine_mode mode = GET_MODE (x);
9265 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
9268 if (x == CONST0_RTX (mode))
9270 if (x == CONST1_RTX (mode))
9273 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9275 /* For XFmode constants, try to find a special 80387 instruction when
9276 optimizing for size or on those CPUs that benefit from them. */
9278 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9282 if (! ext_80387_constants_init)
9283 init_ext_80387_constants ();
9285 for (i = 0; i < 5; i++)
9286 if (real_identical (&r, &ext_80387_constants_table[i]))
9290 /* Load of the constant -0.0 or -1.0 will be split as
9291 fldz;fchs or fld1;fchs sequence. */
9292 if (real_isnegzero (&r))
9294 if (real_identical (&r, &dconstm1))
9300 /* Return the opcode of the special instruction to be used to load
9304 standard_80387_constant_opcode (rtx x)
9306 switch (standard_80387_constant_p (x))
9330 /* Return the CONST_DOUBLE representing the 80387 constant that is
9331 loaded by the specified special instruction. The argument IDX
9332 matches the return value from standard_80387_constant_p. */
9335 standard_80387_constant_rtx (int idx)
9339 if (! ext_80387_constants_init)
9340 init_ext_80387_constants ();
9356 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9360 /* Return 1 if X is all 0s and 2 if x is all 1s
9361 in supported SSE/AVX vector mode. */
9364 standard_sse_constant_p (rtx x)
9366 machine_mode mode = GET_MODE (x);
9368 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
9370 if (vector_all_ones_operand (x, mode))
9398 /* Return the opcode of the special instruction to be used to load
9402 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9404 switch (standard_sse_constant_p (x))
9407 switch (get_attr_mode (insn))
9410 return "vpxord\t%g0, %g0, %g0";
9412 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9413 : "vpxord\t%g0, %g0, %g0";
9415 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9416 : "vpxorq\t%g0, %g0, %g0";
9418 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9419 : "%vpxor\t%0, %d0";
9421 return "%vxorpd\t%0, %d0";
9423 return "%vxorps\t%0, %d0";
9426 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9427 : "vpxor\t%x0, %x0, %x0";
9429 return "vxorpd\t%x0, %x0, %x0";
9431 return "vxorps\t%x0, %x0, %x0";
9439 || get_attr_mode (insn) == MODE_XI
9440 || get_attr_mode (insn) == MODE_V8DF
9441 || get_attr_mode (insn) == MODE_V16SF)
9442 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9444 return "vpcmpeqd\t%0, %0, %0";
9446 return "pcmpeqd\t%0, %0";
9454 /* Returns true if OP contains a symbol reference */
9457 symbolic_reference_mentioned_p (rtx op)
9462 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9465 fmt = GET_RTX_FORMAT (GET_CODE (op));
9466 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9472 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9473 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9477 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9484 /* Return true if it is appropriate to emit `ret' instructions in the
9485 body of a function. Do this only if the epilogue is simple, needing a
9486 couple of insns. Prior to reloading, we can't tell how many registers
9487 must be saved, so return false then. Return false if there is no frame
9488 marker to de-allocate. */
9491 ix86_can_use_return_insn_p (void)
9493 struct ix86_frame frame;
9495 if (! reload_completed || frame_pointer_needed)
9498 /* Don't allow more than 32k pop, since that's all we can do
9499 with one instruction. */
9500 if (crtl->args.pops_args && crtl->args.size >= 32768)
9503 ix86_compute_frame_layout (&frame);
9504 return (frame.stack_pointer_offset == UNITS_PER_WORD
9505 && (frame.nregs + frame.nsseregs) == 0);
9508 /* Value should be nonzero if functions must have frame pointers.
9509 Zero means the frame pointer need not be set up (and parms may
9510 be accessed via the stack pointer) in functions that seem suitable. */
9513 ix86_frame_pointer_required (void)
9515 /* If we accessed previous frames, then the generated code expects
9516 to be able to access the saved ebp value in our frame. */
9517 if (cfun->machine->accesses_prev_frame)
9520 /* Several x86 os'es need a frame pointer for other reasons,
9521 usually pertaining to setjmp. */
9522 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9525 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9526 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9529 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9530 allocation is 4GB. */
9531 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9534 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9535 turns off the frame pointer by default. Turn it back on now if
9536 we've not got a leaf function. */
9537 if (TARGET_OMIT_LEAF_FRAME_POINTER
9539 || ix86_current_function_calls_tls_descriptor))
9542 if (crtl->profile && !flag_fentry)
9548 /* Record that the current function accesses previous call frames. */
9551 ix86_setup_frame_addresses (void)
9553 cfun->machine->accesses_prev_frame = 1;
9556 #ifndef USE_HIDDEN_LINKONCE
9557 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9558 # define USE_HIDDEN_LINKONCE 1
9560 # define USE_HIDDEN_LINKONCE 0
9564 static int pic_labels_used;
9566 /* Fills in the label name that should be used for a pc thunk for
9567 the given register. */
9570 get_pc_thunk_name (char name[32], unsigned int regno)
9572 gcc_assert (!TARGET_64BIT);
9574 if (USE_HIDDEN_LINKONCE)
9575 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9577 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9581 /* This function generates code for -fpic that loads %ebx with
9582 the return address of the caller and then returns. */
9585 ix86_code_end (void)
9590 for (regno = AX_REG; regno <= SP_REG; regno++)
9595 if (!(pic_labels_used & (1 << regno)))
9598 get_pc_thunk_name (name, regno);
9600 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9601 get_identifier (name),
9602 build_function_type_list (void_type_node, NULL_TREE));
9603 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9604 NULL_TREE, void_type_node);
9605 TREE_PUBLIC (decl) = 1;
9606 TREE_STATIC (decl) = 1;
9607 DECL_IGNORED_P (decl) = 1;
9612 switch_to_section (darwin_sections[text_coal_section]);
9613 fputs ("\t.weak_definition\t", asm_out_file);
9614 assemble_name (asm_out_file, name);
9615 fputs ("\n\t.private_extern\t", asm_out_file);
9616 assemble_name (asm_out_file, name);
9617 putc ('\n', asm_out_file);
9618 ASM_OUTPUT_LABEL (asm_out_file, name);
9619 DECL_WEAK (decl) = 1;
9623 if (USE_HIDDEN_LINKONCE)
9625 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9627 targetm.asm_out.unique_section (decl, 0);
9628 switch_to_section (get_named_section (decl, NULL, 0));
9630 targetm.asm_out.globalize_label (asm_out_file, name);
9631 fputs ("\t.hidden\t", asm_out_file);
9632 assemble_name (asm_out_file, name);
9633 putc ('\n', asm_out_file);
9634 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9638 switch_to_section (text_section);
9639 ASM_OUTPUT_LABEL (asm_out_file, name);
9642 DECL_INITIAL (decl) = make_node (BLOCK);
9643 current_function_decl = decl;
9644 init_function_start (decl);
9645 first_function_block_is_cold = false;
9646 /* Make sure unwind info is emitted for the thunk if needed. */
9647 final_start_function (emit_barrier (), asm_out_file, 1);
9649 /* Pad stack IP move with 4 instructions (two NOPs count
9650 as one instruction). */
9651 if (TARGET_PAD_SHORT_FUNCTION)
9656 fputs ("\tnop\n", asm_out_file);
9659 xops[0] = gen_rtx_REG (Pmode, regno);
9660 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9661 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9662 output_asm_insn ("%!ret", NULL);
9663 final_end_function ();
9664 init_insn_lengths ();
9665 free_after_compilation (cfun);
9667 current_function_decl = NULL;
9670 if (flag_split_stack)
9671 file_end_indicate_split_stack ();
9674 /* Emit code for the SET_GOT patterns. */
9677 output_set_got (rtx dest, rtx label)
9683 if (TARGET_VXWORKS_RTP && flag_pic)
9685 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9686 xops[2] = gen_rtx_MEM (Pmode,
9687 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9688 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9690 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9691 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9692 an unadorned address. */
9693 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9694 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9695 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9699 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9704 /* We don't need a pic base, we're not producing pic. */
9707 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9708 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9709 targetm.asm_out.internal_label (asm_out_file, "L",
9710 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9715 get_pc_thunk_name (name, REGNO (dest));
9716 pic_labels_used |= 1 << REGNO (dest);
9718 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9719 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9720 output_asm_insn ("%!call\t%X2", xops);
9723 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9724 This is what will be referenced by the Mach-O PIC subsystem. */
9725 if (machopic_should_output_picbase_label () || !label)
9726 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9728 /* When we are restoring the pic base at the site of a nonlocal label,
9729 and we decided to emit the pic base above, we will still output a
9730 local label used for calculating the correction offset (even though
9731 the offset will be 0 in that case). */
9733 targetm.asm_out.internal_label (asm_out_file, "L",
9734 CODE_LABEL_NUMBER (label));
9739 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9744 /* Generate an "push" pattern for input ARG. */
9749 struct machine_function *m = cfun->machine;
9751 if (m->fs.cfa_reg == stack_pointer_rtx)
9752 m->fs.cfa_offset += UNITS_PER_WORD;
9753 m->fs.sp_offset += UNITS_PER_WORD;
9755 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9756 arg = gen_rtx_REG (word_mode, REGNO (arg));
9758 return gen_rtx_SET (VOIDmode,
9759 gen_rtx_MEM (word_mode,
9760 gen_rtx_PRE_DEC (Pmode,
9761 stack_pointer_rtx)),
9765 /* Generate an "pop" pattern for input ARG. */
9770 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9771 arg = gen_rtx_REG (word_mode, REGNO (arg));
9773 return gen_rtx_SET (VOIDmode,
9775 gen_rtx_MEM (word_mode,
9776 gen_rtx_POST_INC (Pmode,
9777 stack_pointer_rtx)));
9780 /* Return >= 0 if there is an unused call-clobbered register available
9781 for the entire function. */
9784 ix86_select_alt_pic_regnum (void)
9786 if (ix86_use_pseudo_pic_reg ())
9787 return INVALID_REGNUM;
9791 && !ix86_current_function_calls_tls_descriptor)
9794 /* Can't use the same register for both PIC and DRAP. */
9796 drap = REGNO (crtl->drap_reg);
9799 for (i = 2; i >= 0; --i)
9800 if (i != drap && !df_regs_ever_live_p (i))
9804 return INVALID_REGNUM;
9807 /* Return TRUE if we need to save REGNO. */
9810 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9812 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
9813 && pic_offset_table_rtx)
9815 if (ix86_use_pseudo_pic_reg ())
9817 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
9818 _mcount in prologue. */
9819 if (!TARGET_64BIT && flag_pic && crtl->profile)
9822 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9824 || crtl->calls_eh_return
9825 || crtl->uses_const_pool
9826 || cfun->has_nonlocal_label)
9827 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9830 if (crtl->calls_eh_return && maybe_eh_return)
9835 unsigned test = EH_RETURN_DATA_REGNO (i);
9836 if (test == INVALID_REGNUM)
9844 && regno == REGNO (crtl->drap_reg)
9845 && !cfun->machine->no_drap_save_restore)
9848 return (df_regs_ever_live_p (regno)
9849 && !call_used_regs[regno]
9850 && !fixed_regs[regno]
9851 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9854 /* Return number of saved general prupose registers. */
9857 ix86_nsaved_regs (void)
9862 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9863 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9868 /* Return number of saved SSE registrers. */
9871 ix86_nsaved_sseregs (void)
9876 if (!TARGET_64BIT_MS_ABI)
9878 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9879 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9884 /* Given FROM and TO register numbers, say whether this elimination is
9885 allowed. If stack alignment is needed, we can only replace argument
9886 pointer with hard frame pointer, or replace frame pointer with stack
9887 pointer. Otherwise, frame pointer elimination is automatically
9888 handled and all other eliminations are valid. */
9891 ix86_can_eliminate (const int from, const int to)
9893 if (stack_realign_fp)
9894 return ((from == ARG_POINTER_REGNUM
9895 && to == HARD_FRAME_POINTER_REGNUM)
9896 || (from == FRAME_POINTER_REGNUM
9897 && to == STACK_POINTER_REGNUM));
9899 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9902 /* Return the offset between two registers, one to be eliminated, and the other
9903 its replacement, at the start of a routine. */
9906 ix86_initial_elimination_offset (int from, int to)
9908 struct ix86_frame frame;
9909 ix86_compute_frame_layout (&frame);
9911 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9912 return frame.hard_frame_pointer_offset;
9913 else if (from == FRAME_POINTER_REGNUM
9914 && to == HARD_FRAME_POINTER_REGNUM)
9915 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9918 gcc_assert (to == STACK_POINTER_REGNUM);
9920 if (from == ARG_POINTER_REGNUM)
9921 return frame.stack_pointer_offset;
9923 gcc_assert (from == FRAME_POINTER_REGNUM);
9924 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9928 /* In a dynamically-aligned function, we can't know the offset from
9929 stack pointer to frame pointer, so we must ensure that setjmp
9930 eliminates fp against the hard fp (%ebp) rather than trying to
9931 index from %esp up to the top of the frame across a gap that is
9932 of unknown (at compile-time) size. */
9934 ix86_builtin_setjmp_frame_value (void)
9936 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9939 /* When using -fsplit-stack, the allocation routines set a field in
9940 the TCB to the bottom of the stack plus this much space, measured
9943 #define SPLIT_STACK_AVAILABLE 256
9945 /* Fill structure ix86_frame about frame of currently computed function. */
9948 ix86_compute_frame_layout (struct ix86_frame *frame)
9950 unsigned HOST_WIDE_INT stack_alignment_needed;
9951 HOST_WIDE_INT offset;
9952 unsigned HOST_WIDE_INT preferred_alignment;
9953 HOST_WIDE_INT size = get_frame_size ();
9954 HOST_WIDE_INT to_allocate;
9956 frame->nregs = ix86_nsaved_regs ();
9957 frame->nsseregs = ix86_nsaved_sseregs ();
9959 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9960 function prologues and leaf. */
9961 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9962 && (!crtl->is_leaf || cfun->calls_alloca != 0
9963 || ix86_current_function_calls_tls_descriptor))
9965 crtl->preferred_stack_boundary = 128;
9966 crtl->stack_alignment_needed = 128;
9968 /* preferred_stack_boundary is never updated for call
9969 expanded from tls descriptor. Update it here. We don't update it in
9970 expand stage because according to the comments before
9971 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9973 else if (ix86_current_function_calls_tls_descriptor
9974 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9976 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9977 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9978 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9981 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9982 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9984 gcc_assert (!size || stack_alignment_needed);
9985 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9986 gcc_assert (preferred_alignment <= stack_alignment_needed);
9988 /* For SEH we have to limit the amount of code movement into the prologue.
9989 At present we do this via a BLOCKAGE, at which point there's very little
9990 scheduling that can be done, which means that there's very little point
9991 in doing anything except PUSHs. */
9993 cfun->machine->use_fast_prologue_epilogue = false;
9995 /* During reload iteration the amount of registers saved can change.
9996 Recompute the value as needed. Do not recompute when amount of registers
9997 didn't change as reload does multiple calls to the function and does not
9998 expect the decision to change within single iteration. */
9999 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
10000 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
10002 int count = frame->nregs;
10003 struct cgraph_node *node = cgraph_node::get (current_function_decl);
10005 cfun->machine->use_fast_prologue_epilogue_nregs = count;
10007 /* The fast prologue uses move instead of push to save registers. This
10008 is significantly longer, but also executes faster as modern hardware
10009 can execute the moves in parallel, but can't do that for push/pop.
10011 Be careful about choosing what prologue to emit: When function takes
10012 many instructions to execute we may use slow version as well as in
10013 case function is known to be outside hot spot (this is known with
10014 feedback only). Weight the size of function by number of registers
10015 to save as it is cheap to use one or two push instructions but very
10016 slow to use many of them. */
10018 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
10019 if (node->frequency < NODE_FREQUENCY_NORMAL
10020 || (flag_branch_probabilities
10021 && node->frequency < NODE_FREQUENCY_HOT))
10022 cfun->machine->use_fast_prologue_epilogue = false;
10024 cfun->machine->use_fast_prologue_epilogue
10025 = !expensive_function_p (count);
10028 frame->save_regs_using_mov
10029 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10030 /* If static stack checking is enabled and done with probes,
10031 the registers need to be saved before allocating the frame. */
10032 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10034 /* Skip return address. */
10035 offset = UNITS_PER_WORD;
10037 /* Skip pushed static chain. */
10038 if (ix86_static_chain_on_stack)
10039 offset += UNITS_PER_WORD;
10041 /* Skip saved base pointer. */
10042 if (frame_pointer_needed)
10043 offset += UNITS_PER_WORD;
10044 frame->hfp_save_offset = offset;
10046 /* The traditional frame pointer location is at the top of the frame. */
10047 frame->hard_frame_pointer_offset = offset;
10049 /* Register save area */
10050 offset += frame->nregs * UNITS_PER_WORD;
10051 frame->reg_save_offset = offset;
10053 /* On SEH target, registers are pushed just before the frame pointer
10056 frame->hard_frame_pointer_offset = offset;
10058 /* Align and set SSE register save area. */
10059 if (frame->nsseregs)
10061 /* The only ABI that has saved SSE registers (Win64) also has a
10062 16-byte aligned default stack, and thus we don't need to be
10063 within the re-aligned local stack frame to save them. */
10064 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
10065 offset = (offset + 16 - 1) & -16;
10066 offset += frame->nsseregs * 16;
10068 frame->sse_reg_save_offset = offset;
10070 /* The re-aligned stack starts here. Values before this point are not
10071 directly comparable with values below this point. In order to make
10072 sure that no value happens to be the same before and after, force
10073 the alignment computation below to add a non-zero value. */
10074 if (stack_realign_fp)
10075 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
10078 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10079 offset += frame->va_arg_size;
10081 /* Align start of frame for local function. */
10082 if (stack_realign_fp
10083 || offset != frame->sse_reg_save_offset
10086 || cfun->calls_alloca
10087 || ix86_current_function_calls_tls_descriptor)
10088 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10090 /* Frame pointer points here. */
10091 frame->frame_pointer_offset = offset;
10095 /* Add outgoing arguments area. Can be skipped if we eliminated
10096 all the function calls as dead code.
10097 Skipping is however impossible when function calls alloca. Alloca
10098 expander assumes that last crtl->outgoing_args_size
10099 of stack frame are unused. */
10100 if (ACCUMULATE_OUTGOING_ARGS
10101 && (!crtl->is_leaf || cfun->calls_alloca
10102 || ix86_current_function_calls_tls_descriptor))
10104 offset += crtl->outgoing_args_size;
10105 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10108 frame->outgoing_arguments_size = 0;
10110 /* Align stack boundary. Only needed if we're calling another function
10111 or using alloca. */
10112 if (!crtl->is_leaf || cfun->calls_alloca
10113 || ix86_current_function_calls_tls_descriptor)
10114 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10116 /* We've reached end of stack frame. */
10117 frame->stack_pointer_offset = offset;
10119 /* Size prologue needs to allocate. */
10120 to_allocate = offset - frame->sse_reg_save_offset;
10122 if ((!to_allocate && frame->nregs <= 1)
10123 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10124 frame->save_regs_using_mov = false;
10126 if (ix86_using_red_zone ()
10127 && crtl->sp_is_unchanging
10129 && !ix86_current_function_calls_tls_descriptor)
10131 frame->red_zone_size = to_allocate;
10132 if (frame->save_regs_using_mov)
10133 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10134 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10135 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10138 frame->red_zone_size = 0;
10139 frame->stack_pointer_offset -= frame->red_zone_size;
10141 /* The SEH frame pointer location is near the bottom of the frame.
10142 This is enforced by the fact that the difference between the
10143 stack pointer and the frame pointer is limited to 240 bytes in
10144 the unwind data structure. */
10147 HOST_WIDE_INT diff;
10149 /* If we can leave the frame pointer where it is, do so. Also, returns
10150 the establisher frame for __builtin_frame_address (0). */
10151 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10152 if (diff <= SEH_MAX_FRAME_SIZE
10153 && (diff > 240 || (diff & 15) != 0)
10154 && !crtl->accesses_prior_frames)
10156 /* Ideally we'd determine what portion of the local stack frame
10157 (within the constraint of the lowest 240) is most heavily used.
10158 But without that complication, simply bias the frame pointer
10159 by 128 bytes so as to maximize the amount of the local stack
10160 frame that is addressable with 8-bit offsets. */
10161 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10166 /* This is semi-inlined memory_address_length, but simplified
10167 since we know that we're always dealing with reg+offset, and
10168 to avoid having to create and discard all that rtl. */
10171 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10177 /* EBP and R13 cannot be encoded without an offset. */
10178 len = (regno == BP_REG || regno == R13_REG);
10180 else if (IN_RANGE (offset, -128, 127))
10183 /* ESP and R12 must be encoded with a SIB byte. */
10184 if (regno == SP_REG || regno == R12_REG)
10190 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10191 The valid base registers are taken from CFUN->MACHINE->FS. */
10194 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10196 const struct machine_function *m = cfun->machine;
10197 rtx base_reg = NULL;
10198 HOST_WIDE_INT base_offset = 0;
10200 if (m->use_fast_prologue_epilogue)
10202 /* Choose the base register most likely to allow the most scheduling
10203 opportunities. Generally FP is valid throughout the function,
10204 while DRAP must be reloaded within the epilogue. But choose either
10205 over the SP due to increased encoding size. */
10207 if (m->fs.fp_valid)
10209 base_reg = hard_frame_pointer_rtx;
10210 base_offset = m->fs.fp_offset - cfa_offset;
10212 else if (m->fs.drap_valid)
10214 base_reg = crtl->drap_reg;
10215 base_offset = 0 - cfa_offset;
10217 else if (m->fs.sp_valid)
10219 base_reg = stack_pointer_rtx;
10220 base_offset = m->fs.sp_offset - cfa_offset;
10225 HOST_WIDE_INT toffset;
10226 int len = 16, tlen;
10228 /* Choose the base register with the smallest address encoding.
10229 With a tie, choose FP > DRAP > SP. */
10230 if (m->fs.sp_valid)
10232 base_reg = stack_pointer_rtx;
10233 base_offset = m->fs.sp_offset - cfa_offset;
10234 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10236 if (m->fs.drap_valid)
10238 toffset = 0 - cfa_offset;
10239 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10242 base_reg = crtl->drap_reg;
10243 base_offset = toffset;
10247 if (m->fs.fp_valid)
10249 toffset = m->fs.fp_offset - cfa_offset;
10250 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10253 base_reg = hard_frame_pointer_rtx;
10254 base_offset = toffset;
10259 gcc_assert (base_reg != NULL);
10261 return plus_constant (Pmode, base_reg, base_offset);
10264 /* Emit code to save registers in the prologue. */
10267 ix86_emit_save_regs (void)
10269 unsigned int regno;
10272 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10273 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10275 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10276 RTX_FRAME_RELATED_P (insn) = 1;
10280 /* Emit a single register save at CFA - CFA_OFFSET. */
10283 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10284 HOST_WIDE_INT cfa_offset)
10286 struct machine_function *m = cfun->machine;
10287 rtx reg = gen_rtx_REG (mode, regno);
10288 rtx mem, addr, base, insn;
10290 addr = choose_baseaddr (cfa_offset);
10291 mem = gen_frame_mem (mode, addr);
10293 /* For SSE saves, we need to indicate the 128-bit alignment. */
10294 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10296 insn = emit_move_insn (mem, reg);
10297 RTX_FRAME_RELATED_P (insn) = 1;
10300 if (GET_CODE (base) == PLUS)
10301 base = XEXP (base, 0);
10302 gcc_checking_assert (REG_P (base));
10304 /* When saving registers into a re-aligned local stack frame, avoid
10305 any tricky guessing by dwarf2out. */
10306 if (m->fs.realigned)
10308 gcc_checking_assert (stack_realign_drap);
10310 if (regno == REGNO (crtl->drap_reg))
10312 /* A bit of a hack. We force the DRAP register to be saved in
10313 the re-aligned stack frame, which provides us with a copy
10314 of the CFA that will last past the prologue. Install it. */
10315 gcc_checking_assert (cfun->machine->fs.fp_valid);
10316 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10317 cfun->machine->fs.fp_offset - cfa_offset);
10318 mem = gen_rtx_MEM (mode, addr);
10319 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10323 /* The frame pointer is a stable reference within the
10324 aligned frame. Use it. */
10325 gcc_checking_assert (cfun->machine->fs.fp_valid);
10326 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10327 cfun->machine->fs.fp_offset - cfa_offset);
10328 mem = gen_rtx_MEM (mode, addr);
10329 add_reg_note (insn, REG_CFA_EXPRESSION,
10330 gen_rtx_SET (VOIDmode, mem, reg));
10334 /* The memory may not be relative to the current CFA register,
10335 which means that we may need to generate a new pattern for
10336 use by the unwind info. */
10337 else if (base != m->fs.cfa_reg)
10339 addr = plus_constant (Pmode, m->fs.cfa_reg,
10340 m->fs.cfa_offset - cfa_offset);
10341 mem = gen_rtx_MEM (mode, addr);
10342 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
10346 /* Emit code to save registers using MOV insns.
10347 First register is stored at CFA - CFA_OFFSET. */
10349 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10351 unsigned int regno;
10353 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10354 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10356 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10357 cfa_offset -= UNITS_PER_WORD;
10361 /* Emit code to save SSE registers using MOV insns.
10362 First register is stored at CFA - CFA_OFFSET. */
10364 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10366 unsigned int regno;
10368 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10369 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10371 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10376 static GTY(()) rtx queued_cfa_restores;
10378 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10379 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10380 Don't add the note if the previously saved value will be left untouched
10381 within stack red-zone till return, as unwinders can find the same value
10382 in the register and on the stack. */
10385 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
10387 if (!crtl->shrink_wrapped
10388 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10393 add_reg_note (insn, REG_CFA_RESTORE, reg);
10394 RTX_FRAME_RELATED_P (insn) = 1;
10397 queued_cfa_restores
10398 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10401 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10404 ix86_add_queued_cfa_restore_notes (rtx insn)
10407 if (!queued_cfa_restores)
10409 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10411 XEXP (last, 1) = REG_NOTES (insn);
10412 REG_NOTES (insn) = queued_cfa_restores;
10413 queued_cfa_restores = NULL_RTX;
10414 RTX_FRAME_RELATED_P (insn) = 1;
10417 /* Expand prologue or epilogue stack adjustment.
10418 The pattern exist to put a dependency on all ebp-based memory accesses.
10419 STYLE should be negative if instructions should be marked as frame related,
10420 zero if %r11 register is live and cannot be freely used and positive
10424 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10425 int style, bool set_cfa)
10427 struct machine_function *m = cfun->machine;
10429 bool add_frame_related_expr = false;
10431 if (Pmode == SImode)
10432 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10433 else if (x86_64_immediate_operand (offset, DImode))
10434 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10438 /* r11 is used by indirect sibcall return as well, set before the
10439 epilogue and used after the epilogue. */
10441 tmp = gen_rtx_REG (DImode, R11_REG);
10444 gcc_assert (src != hard_frame_pointer_rtx
10445 && dest != hard_frame_pointer_rtx);
10446 tmp = hard_frame_pointer_rtx;
10448 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10450 add_frame_related_expr = true;
10452 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10455 insn = emit_insn (insn);
10457 ix86_add_queued_cfa_restore_notes (insn);
10463 gcc_assert (m->fs.cfa_reg == src);
10464 m->fs.cfa_offset += INTVAL (offset);
10465 m->fs.cfa_reg = dest;
10467 r = gen_rtx_PLUS (Pmode, src, offset);
10468 r = gen_rtx_SET (VOIDmode, dest, r);
10469 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10470 RTX_FRAME_RELATED_P (insn) = 1;
10472 else if (style < 0)
10474 RTX_FRAME_RELATED_P (insn) = 1;
10475 if (add_frame_related_expr)
10477 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10478 r = gen_rtx_SET (VOIDmode, dest, r);
10479 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10483 if (dest == stack_pointer_rtx)
10485 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10486 bool valid = m->fs.sp_valid;
10488 if (src == hard_frame_pointer_rtx)
10490 valid = m->fs.fp_valid;
10491 ooffset = m->fs.fp_offset;
10493 else if (src == crtl->drap_reg)
10495 valid = m->fs.drap_valid;
10500 /* Else there are two possibilities: SP itself, which we set
10501 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10502 taken care of this by hand along the eh_return path. */
10503 gcc_checking_assert (src == stack_pointer_rtx
10504 || offset == const0_rtx);
10507 m->fs.sp_offset = ooffset - INTVAL (offset);
10508 m->fs.sp_valid = valid;
10512 /* Find an available register to be used as dynamic realign argument
10513 pointer regsiter. Such a register will be written in prologue and
10514 used in begin of body, so it must not be
10515 1. parameter passing register.
10517 We reuse static-chain register if it is available. Otherwise, we
10518 use DI for i386 and R13 for x86-64. We chose R13 since it has
10521 Return: the regno of chosen register. */
10523 static unsigned int
10524 find_drap_reg (void)
10526 tree decl = cfun->decl;
10530 /* Use R13 for nested function or function need static chain.
10531 Since function with tail call may use any caller-saved
10532 registers in epilogue, DRAP must not use caller-saved
10533 register in such case. */
10534 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10541 /* Use DI for nested function or function need static chain.
10542 Since function with tail call may use any caller-saved
10543 registers in epilogue, DRAP must not use caller-saved
10544 register in such case. */
10545 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10548 /* Reuse static chain register if it isn't used for parameter
10550 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10552 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10553 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10560 /* Return minimum incoming stack alignment. */
10562 static unsigned int
10563 ix86_minimum_incoming_stack_boundary (bool sibcall)
10565 unsigned int incoming_stack_boundary;
10567 /* Prefer the one specified at command line. */
10568 if (ix86_user_incoming_stack_boundary)
10569 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10570 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10571 if -mstackrealign is used, it isn't used for sibcall check and
10572 estimated stack alignment is 128bit. */
10575 && ix86_force_align_arg_pointer
10576 && crtl->stack_alignment_estimated == 128)
10577 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10579 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10581 /* Incoming stack alignment can be changed on individual functions
10582 via force_align_arg_pointer attribute. We use the smallest
10583 incoming stack boundary. */
10584 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10585 && lookup_attribute (ix86_force_align_arg_pointer_string,
10586 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10587 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10589 /* The incoming stack frame has to be aligned at least at
10590 parm_stack_boundary. */
10591 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10592 incoming_stack_boundary = crtl->parm_stack_boundary;
10594 /* Stack at entrance of main is aligned by runtime. We use the
10595 smallest incoming stack boundary. */
10596 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10597 && DECL_NAME (current_function_decl)
10598 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10599 && DECL_FILE_SCOPE_P (current_function_decl))
10600 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10602 return incoming_stack_boundary;
10605 /* Update incoming stack boundary and estimated stack alignment. */
10608 ix86_update_stack_boundary (void)
10610 ix86_incoming_stack_boundary
10611 = ix86_minimum_incoming_stack_boundary (false);
10613 /* x86_64 vararg needs 16byte stack alignment for register save
10617 && crtl->stack_alignment_estimated < 128)
10618 crtl->stack_alignment_estimated = 128;
10621 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10622 needed or an rtx for DRAP otherwise. */
10625 ix86_get_drap_rtx (void)
10627 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10628 crtl->need_drap = true;
10630 if (stack_realign_drap)
10632 /* Assign DRAP to vDRAP and returns vDRAP */
10633 unsigned int regno = find_drap_reg ();
10636 rtx_insn *seq, *insn;
10638 arg_ptr = gen_rtx_REG (Pmode, regno);
10639 crtl->drap_reg = arg_ptr;
10642 drap_vreg = copy_to_reg (arg_ptr);
10643 seq = get_insns ();
10646 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10649 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10650 RTX_FRAME_RELATED_P (insn) = 1;
10658 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10661 ix86_internal_arg_pointer (void)
10663 return virtual_incoming_args_rtx;
10666 struct scratch_reg {
10671 /* Return a short-lived scratch register for use on function entry.
10672 In 32-bit mode, it is valid only after the registers are saved
10673 in the prologue. This register must be released by means of
10674 release_scratch_register_on_entry once it is dead. */
10677 get_scratch_register_on_entry (struct scratch_reg *sr)
10685 /* We always use R11 in 64-bit mode. */
10690 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10692 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10694 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10695 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10696 int regparm = ix86_function_regparm (fntype, decl);
10698 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10700 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10701 for the static chain register. */
10702 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10703 && drap_regno != AX_REG)
10705 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10706 for the static chain register. */
10707 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10709 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10711 /* ecx is the static chain register. */
10712 else if (regparm < 3 && !fastcall_p && !thiscall_p
10714 && drap_regno != CX_REG)
10716 else if (ix86_save_reg (BX_REG, true))
10718 /* esi is the static chain register. */
10719 else if (!(regparm == 3 && static_chain_p)
10720 && ix86_save_reg (SI_REG, true))
10722 else if (ix86_save_reg (DI_REG, true))
10726 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10731 sr->reg = gen_rtx_REG (Pmode, regno);
10734 rtx insn = emit_insn (gen_push (sr->reg));
10735 RTX_FRAME_RELATED_P (insn) = 1;
10739 /* Release a scratch register obtained from the preceding function. */
10742 release_scratch_register_on_entry (struct scratch_reg *sr)
10746 struct machine_function *m = cfun->machine;
10747 rtx x, insn = emit_insn (gen_pop (sr->reg));
10749 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10750 RTX_FRAME_RELATED_P (insn) = 1;
10751 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10752 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10753 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10754 m->fs.sp_offset -= UNITS_PER_WORD;
10758 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10760 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10763 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10765 /* We skip the probe for the first interval + a small dope of 4 words and
10766 probe that many bytes past the specified size to maintain a protection
10767 area at the botton of the stack. */
10768 const int dope = 4 * UNITS_PER_WORD;
10769 rtx size_rtx = GEN_INT (size), last;
10771 /* See if we have a constant small number of probes to generate. If so,
10772 that's the easy case. The run-time loop is made up of 11 insns in the
10773 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10774 for n # of intervals. */
10775 if (size <= 5 * PROBE_INTERVAL)
10777 HOST_WIDE_INT i, adjust;
10778 bool first_probe = true;
10780 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10781 values of N from 1 until it exceeds SIZE. If only one probe is
10782 needed, this will not generate any code. Then adjust and probe
10783 to PROBE_INTERVAL + SIZE. */
10784 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10788 adjust = 2 * PROBE_INTERVAL + dope;
10789 first_probe = false;
10792 adjust = PROBE_INTERVAL;
10794 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10795 plus_constant (Pmode, stack_pointer_rtx,
10797 emit_stack_probe (stack_pointer_rtx);
10801 adjust = size + PROBE_INTERVAL + dope;
10803 adjust = size + PROBE_INTERVAL - i;
10805 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10806 plus_constant (Pmode, stack_pointer_rtx,
10808 emit_stack_probe (stack_pointer_rtx);
10810 /* Adjust back to account for the additional first interval. */
10811 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10812 plus_constant (Pmode, stack_pointer_rtx,
10813 PROBE_INTERVAL + dope)));
10816 /* Otherwise, do the same as above, but in a loop. Note that we must be
10817 extra careful with variables wrapping around because we might be at
10818 the very top (or the very bottom) of the address space and we have
10819 to be able to handle this case properly; in particular, we use an
10820 equality test for the loop condition. */
10823 HOST_WIDE_INT rounded_size;
10824 struct scratch_reg sr;
10826 get_scratch_register_on_entry (&sr);
10829 /* Step 1: round SIZE to the previous multiple of the interval. */
10831 rounded_size = size & -PROBE_INTERVAL;
10834 /* Step 2: compute initial and final value of the loop counter. */
10836 /* SP = SP_0 + PROBE_INTERVAL. */
10837 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10838 plus_constant (Pmode, stack_pointer_rtx,
10839 - (PROBE_INTERVAL + dope))));
10841 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10842 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10843 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10844 gen_rtx_PLUS (Pmode, sr.reg,
10845 stack_pointer_rtx)));
10848 /* Step 3: the loop
10850 while (SP != LAST_ADDR)
10852 SP = SP + PROBE_INTERVAL
10856 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10857 values of N from 1 until it is equal to ROUNDED_SIZE. */
10859 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10862 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10863 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10865 if (size != rounded_size)
10867 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10868 plus_constant (Pmode, stack_pointer_rtx,
10869 rounded_size - size)));
10870 emit_stack_probe (stack_pointer_rtx);
10873 /* Adjust back to account for the additional first interval. */
10874 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10875 plus_constant (Pmode, stack_pointer_rtx,
10876 PROBE_INTERVAL + dope)));
10878 release_scratch_register_on_entry (&sr);
10881 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10883 /* Even if the stack pointer isn't the CFA register, we need to correctly
10884 describe the adjustments made to it, in particular differentiate the
10885 frame-related ones from the frame-unrelated ones. */
10888 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10889 XVECEXP (expr, 0, 0)
10890 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10891 plus_constant (Pmode, stack_pointer_rtx, -size));
10892 XVECEXP (expr, 0, 1)
10893 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10894 plus_constant (Pmode, stack_pointer_rtx,
10895 PROBE_INTERVAL + dope + size));
10896 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10897 RTX_FRAME_RELATED_P (last) = 1;
10899 cfun->machine->fs.sp_offset += size;
10902 /* Make sure nothing is scheduled before we are done. */
10903 emit_insn (gen_blockage ());
10906 /* Adjust the stack pointer up to REG while probing it. */
10909 output_adjust_stack_and_probe (rtx reg)
10911 static int labelno = 0;
10912 char loop_lab[32], end_lab[32];
10915 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10916 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10918 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10920 /* Jump to END_LAB if SP == LAST_ADDR. */
10921 xops[0] = stack_pointer_rtx;
10923 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10924 fputs ("\tje\t", asm_out_file);
10925 assemble_name_raw (asm_out_file, end_lab);
10926 fputc ('\n', asm_out_file);
10928 /* SP = SP + PROBE_INTERVAL. */
10929 xops[1] = GEN_INT (PROBE_INTERVAL);
10930 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10933 xops[1] = const0_rtx;
10934 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10936 fprintf (asm_out_file, "\tjmp\t");
10937 assemble_name_raw (asm_out_file, loop_lab);
10938 fputc ('\n', asm_out_file);
10940 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10945 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10946 inclusive. These are offsets from the current stack pointer. */
10949 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10951 /* See if we have a constant small number of probes to generate. If so,
10952 that's the easy case. The run-time loop is made up of 7 insns in the
10953 generic case while the compile-time loop is made up of n insns for n #
10955 if (size <= 7 * PROBE_INTERVAL)
10959 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10960 it exceeds SIZE. If only one probe is needed, this will not
10961 generate any code. Then probe at FIRST + SIZE. */
10962 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10963 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10966 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10970 /* Otherwise, do the same as above, but in a loop. Note that we must be
10971 extra careful with variables wrapping around because we might be at
10972 the very top (or the very bottom) of the address space and we have
10973 to be able to handle this case properly; in particular, we use an
10974 equality test for the loop condition. */
10977 HOST_WIDE_INT rounded_size, last;
10978 struct scratch_reg sr;
10980 get_scratch_register_on_entry (&sr);
10983 /* Step 1: round SIZE to the previous multiple of the interval. */
10985 rounded_size = size & -PROBE_INTERVAL;
10988 /* Step 2: compute initial and final value of the loop counter. */
10990 /* TEST_OFFSET = FIRST. */
10991 emit_move_insn (sr.reg, GEN_INT (-first));
10993 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10994 last = first + rounded_size;
10997 /* Step 3: the loop
10999 while (TEST_ADDR != LAST_ADDR)
11001 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
11005 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
11006 until it is equal to ROUNDED_SIZE. */
11008 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
11011 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
11012 that SIZE is equal to ROUNDED_SIZE. */
11014 if (size != rounded_size)
11015 emit_stack_probe (plus_constant (Pmode,
11016 gen_rtx_PLUS (Pmode,
11019 rounded_size - size));
11021 release_scratch_register_on_entry (&sr);
11024 /* Make sure nothing is scheduled before we are done. */
11025 emit_insn (gen_blockage ());
11028 /* Probe a range of stack addresses from REG to END, inclusive. These are
11029 offsets from the current stack pointer. */
11032 output_probe_stack_range (rtx reg, rtx end)
11034 static int labelno = 0;
11035 char loop_lab[32], end_lab[32];
11038 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11039 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11041 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11043 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
11046 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11047 fputs ("\tje\t", asm_out_file);
11048 assemble_name_raw (asm_out_file, end_lab);
11049 fputc ('\n', asm_out_file);
11051 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
11052 xops[1] = GEN_INT (PROBE_INTERVAL);
11053 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11055 /* Probe at TEST_ADDR. */
11056 xops[0] = stack_pointer_rtx;
11058 xops[2] = const0_rtx;
11059 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11061 fprintf (asm_out_file, "\tjmp\t");
11062 assemble_name_raw (asm_out_file, loop_lab);
11063 fputc ('\n', asm_out_file);
11065 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11070 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11071 to be generated in correct form. */
11073 ix86_finalize_stack_realign_flags (void)
11075 /* Check if stack realign is really needed after reload, and
11076 stores result in cfun */
11077 unsigned int incoming_stack_boundary
11078 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11079 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11080 unsigned int stack_realign = (incoming_stack_boundary
11082 ? crtl->max_used_stack_slot_alignment
11083 : crtl->stack_alignment_needed));
11085 if (crtl->stack_realign_finalized)
11087 /* After stack_realign_needed is finalized, we can't no longer
11089 gcc_assert (crtl->stack_realign_needed == stack_realign);
11093 /* If the only reason for frame_pointer_needed is that we conservatively
11094 assumed stack realignment might be needed, but in the end nothing that
11095 needed the stack alignment had been spilled, clear frame_pointer_needed
11096 and say we don't need stack realignment. */
11098 && frame_pointer_needed
11100 && flag_omit_frame_pointer
11101 && crtl->sp_is_unchanging
11102 && !ix86_current_function_calls_tls_descriptor
11103 && !crtl->accesses_prior_frames
11104 && !cfun->calls_alloca
11105 && !crtl->calls_eh_return
11106 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
11107 && !ix86_frame_pointer_required ()
11108 && get_frame_size () == 0
11109 && ix86_nsaved_sseregs () == 0
11110 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11112 HARD_REG_SET set_up_by_prologue, prologue_used;
11115 CLEAR_HARD_REG_SET (prologue_used);
11116 CLEAR_HARD_REG_SET (set_up_by_prologue);
11117 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11118 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11119 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11120 HARD_FRAME_POINTER_REGNUM);
11121 FOR_EACH_BB_FN (bb, cfun)
11124 FOR_BB_INSNS (bb, insn)
11125 if (NONDEBUG_INSN_P (insn)
11126 && requires_stack_frame_p (insn, prologue_used,
11127 set_up_by_prologue))
11129 crtl->stack_realign_needed = stack_realign;
11130 crtl->stack_realign_finalized = true;
11135 /* If drap has been set, but it actually isn't live at the start
11136 of the function, there is no reason to set it up. */
11137 if (crtl->drap_reg)
11139 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11140 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11142 crtl->drap_reg = NULL_RTX;
11143 crtl->need_drap = false;
11147 cfun->machine->no_drap_save_restore = true;
11149 frame_pointer_needed = false;
11150 stack_realign = false;
11151 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11152 crtl->stack_alignment_needed = incoming_stack_boundary;
11153 crtl->stack_alignment_estimated = incoming_stack_boundary;
11154 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11155 crtl->preferred_stack_boundary = incoming_stack_boundary;
11156 df_finish_pass (true);
11157 df_scan_alloc (NULL);
11159 df_compute_regs_ever_live (true);
11163 crtl->stack_realign_needed = stack_realign;
11164 crtl->stack_realign_finalized = true;
11167 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11170 ix86_elim_entry_set_got (rtx reg)
11172 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11173 rtx_insn *c_insn = BB_HEAD (bb);
11174 if (!NONDEBUG_INSN_P (c_insn))
11175 c_insn = next_nonnote_nondebug_insn (c_insn);
11176 if (c_insn && NONJUMP_INSN_P (c_insn))
11178 rtx pat = PATTERN (c_insn);
11179 if (GET_CODE (pat) == PARALLEL)
11181 rtx vec = XVECEXP (pat, 0, 0);
11182 if (GET_CODE (vec) == SET
11183 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11184 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11185 delete_insn (c_insn);
11190 /* Expand the prologue into a bunch of separate insns. */
11193 ix86_expand_prologue (void)
11195 struct machine_function *m = cfun->machine;
11197 struct ix86_frame frame;
11198 HOST_WIDE_INT allocate;
11199 bool int_registers_saved;
11200 bool sse_registers_saved;
11202 ix86_finalize_stack_realign_flags ();
11204 /* DRAP should not coexist with stack_realign_fp */
11205 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11207 memset (&m->fs, 0, sizeof (m->fs));
11209 /* Initialize CFA state for before the prologue. */
11210 m->fs.cfa_reg = stack_pointer_rtx;
11211 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11213 /* Track SP offset to the CFA. We continue tracking this after we've
11214 swapped the CFA register away from SP. In the case of re-alignment
11215 this is fudged; we're interested to offsets within the local frame. */
11216 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11217 m->fs.sp_valid = true;
11219 ix86_compute_frame_layout (&frame);
11221 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11223 /* We should have already generated an error for any use of
11224 ms_hook on a nested function. */
11225 gcc_checking_assert (!ix86_static_chain_on_stack);
11227 /* Check if profiling is active and we shall use profiling before
11228 prologue variant. If so sorry. */
11229 if (crtl->profile && flag_fentry != 0)
11230 sorry ("ms_hook_prologue attribute isn%'t compatible "
11231 "with -mfentry for 32-bit");
11233 /* In ix86_asm_output_function_label we emitted:
11234 8b ff movl.s %edi,%edi
11236 8b ec movl.s %esp,%ebp
11238 This matches the hookable function prologue in Win32 API
11239 functions in Microsoft Windows XP Service Pack 2 and newer.
11240 Wine uses this to enable Windows apps to hook the Win32 API
11241 functions provided by Wine.
11243 What that means is that we've already set up the frame pointer. */
11245 if (frame_pointer_needed
11246 && !(crtl->drap_reg && crtl->stack_realign_needed))
11250 /* We've decided to use the frame pointer already set up.
11251 Describe this to the unwinder by pretending that both
11252 push and mov insns happen right here.
11254 Putting the unwind info here at the end of the ms_hook
11255 is done so that we can make absolutely certain we get
11256 the required byte sequence at the start of the function,
11257 rather than relying on an assembler that can produce
11258 the exact encoding required.
11260 However it does mean (in the unpatched case) that we have
11261 a 1 insn window where the asynchronous unwind info is
11262 incorrect. However, if we placed the unwind info at
11263 its correct location we would have incorrect unwind info
11264 in the patched case. Which is probably all moot since
11265 I don't expect Wine generates dwarf2 unwind info for the
11266 system libraries that use this feature. */
11268 insn = emit_insn (gen_blockage ());
11270 push = gen_push (hard_frame_pointer_rtx);
11271 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
11272 stack_pointer_rtx);
11273 RTX_FRAME_RELATED_P (push) = 1;
11274 RTX_FRAME_RELATED_P (mov) = 1;
11276 RTX_FRAME_RELATED_P (insn) = 1;
11277 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11278 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11280 /* Note that gen_push incremented m->fs.cfa_offset, even
11281 though we didn't emit the push insn here. */
11282 m->fs.cfa_reg = hard_frame_pointer_rtx;
11283 m->fs.fp_offset = m->fs.cfa_offset;
11284 m->fs.fp_valid = true;
11288 /* The frame pointer is not needed so pop %ebp again.
11289 This leaves us with a pristine state. */
11290 emit_insn (gen_pop (hard_frame_pointer_rtx));
11294 /* The first insn of a function that accepts its static chain on the
11295 stack is to push the register that would be filled in by a direct
11296 call. This insn will be skipped by the trampoline. */
11297 else if (ix86_static_chain_on_stack)
11299 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11300 emit_insn (gen_blockage ());
11302 /* We don't want to interpret this push insn as a register save,
11303 only as a stack adjustment. The real copy of the register as
11304 a save will be done later, if needed. */
11305 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11306 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
11307 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11308 RTX_FRAME_RELATED_P (insn) = 1;
11311 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11312 of DRAP is needed and stack realignment is really needed after reload */
11313 if (stack_realign_drap)
11315 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11317 /* Only need to push parameter pointer reg if it is caller saved. */
11318 if (!call_used_regs[REGNO (crtl->drap_reg)])
11320 /* Push arg pointer reg */
11321 insn = emit_insn (gen_push (crtl->drap_reg));
11322 RTX_FRAME_RELATED_P (insn) = 1;
11325 /* Grab the argument pointer. */
11326 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11327 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11328 RTX_FRAME_RELATED_P (insn) = 1;
11329 m->fs.cfa_reg = crtl->drap_reg;
11330 m->fs.cfa_offset = 0;
11332 /* Align the stack. */
11333 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11335 GEN_INT (-align_bytes)));
11336 RTX_FRAME_RELATED_P (insn) = 1;
11338 /* Replicate the return address on the stack so that return
11339 address can be reached via (argp - 1) slot. This is needed
11340 to implement macro RETURN_ADDR_RTX and intrinsic function
11341 expand_builtin_return_addr etc. */
11342 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11343 t = gen_frame_mem (word_mode, t);
11344 insn = emit_insn (gen_push (t));
11345 RTX_FRAME_RELATED_P (insn) = 1;
11347 /* For the purposes of frame and register save area addressing,
11348 we've started over with a new frame. */
11349 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11350 m->fs.realigned = true;
11353 int_registers_saved = (frame.nregs == 0);
11354 sse_registers_saved = (frame.nsseregs == 0);
11356 if (frame_pointer_needed && !m->fs.fp_valid)
11358 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11359 slower on all targets. Also sdb doesn't like it. */
11360 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11361 RTX_FRAME_RELATED_P (insn) = 1;
11363 /* Push registers now, before setting the frame pointer
11365 if (!int_registers_saved
11367 && !frame.save_regs_using_mov)
11369 ix86_emit_save_regs ();
11370 int_registers_saved = true;
11371 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11374 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11376 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11377 RTX_FRAME_RELATED_P (insn) = 1;
11379 if (m->fs.cfa_reg == stack_pointer_rtx)
11380 m->fs.cfa_reg = hard_frame_pointer_rtx;
11381 m->fs.fp_offset = m->fs.sp_offset;
11382 m->fs.fp_valid = true;
11386 if (!int_registers_saved)
11388 /* If saving registers via PUSH, do so now. */
11389 if (!frame.save_regs_using_mov)
11391 ix86_emit_save_regs ();
11392 int_registers_saved = true;
11393 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11396 /* When using red zone we may start register saving before allocating
11397 the stack frame saving one cycle of the prologue. However, avoid
11398 doing this if we have to probe the stack; at least on x86_64 the
11399 stack probe can turn into a call that clobbers a red zone location. */
11400 else if (ix86_using_red_zone ()
11401 && (! TARGET_STACK_PROBE
11402 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11404 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11405 int_registers_saved = true;
11409 if (stack_realign_fp)
11411 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11412 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11414 /* The computation of the size of the re-aligned stack frame means
11415 that we must allocate the size of the register save area before
11416 performing the actual alignment. Otherwise we cannot guarantee
11417 that there's enough storage above the realignment point. */
11418 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11419 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11420 GEN_INT (m->fs.sp_offset
11421 - frame.sse_reg_save_offset),
11424 /* Align the stack. */
11425 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11427 GEN_INT (-align_bytes)));
11429 /* For the purposes of register save area addressing, the stack
11430 pointer is no longer valid. As for the value of sp_offset,
11431 see ix86_compute_frame_layout, which we need to match in order
11432 to pass verification of stack_pointer_offset at the end. */
11433 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11434 m->fs.sp_valid = false;
11437 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11439 if (flag_stack_usage_info)
11441 /* We start to count from ARG_POINTER. */
11442 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11444 /* If it was realigned, take into account the fake frame. */
11445 if (stack_realign_drap)
11447 if (ix86_static_chain_on_stack)
11448 stack_size += UNITS_PER_WORD;
11450 if (!call_used_regs[REGNO (crtl->drap_reg)])
11451 stack_size += UNITS_PER_WORD;
11453 /* This over-estimates by 1 minimal-stack-alignment-unit but
11454 mitigates that by counting in the new return address slot. */
11455 current_function_dynamic_stack_size
11456 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11459 current_function_static_stack_size = stack_size;
11462 /* On SEH target with very large frame size, allocate an area to save
11463 SSE registers (as the very large allocation won't be described). */
11465 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11466 && !sse_registers_saved)
11468 HOST_WIDE_INT sse_size =
11469 frame.sse_reg_save_offset - frame.reg_save_offset;
11471 gcc_assert (int_registers_saved);
11473 /* No need to do stack checking as the area will be immediately
11475 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11476 GEN_INT (-sse_size), -1,
11477 m->fs.cfa_reg == stack_pointer_rtx);
11478 allocate -= sse_size;
11479 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11480 sse_registers_saved = true;
11483 /* The stack has already been decremented by the instruction calling us
11484 so probe if the size is non-negative to preserve the protection area. */
11485 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11487 /* We expect the registers to be saved when probes are used. */
11488 gcc_assert (int_registers_saved);
11490 if (STACK_CHECK_MOVING_SP)
11492 if (!(crtl->is_leaf && !cfun->calls_alloca
11493 && allocate <= PROBE_INTERVAL))
11495 ix86_adjust_stack_and_probe (allocate);
11501 HOST_WIDE_INT size = allocate;
11503 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11504 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11506 if (TARGET_STACK_PROBE)
11508 if (crtl->is_leaf && !cfun->calls_alloca)
11510 if (size > PROBE_INTERVAL)
11511 ix86_emit_probe_stack_range (0, size);
11514 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11518 if (crtl->is_leaf && !cfun->calls_alloca)
11520 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11521 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11522 size - STACK_CHECK_PROTECT);
11525 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11532 else if (!ix86_target_stack_probe ()
11533 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11535 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11536 GEN_INT (-allocate), -1,
11537 m->fs.cfa_reg == stack_pointer_rtx);
11541 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11543 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11544 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11545 bool eax_live = ix86_eax_live_at_start_p ();
11546 bool r10_live = false;
11549 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11553 insn = emit_insn (gen_push (eax));
11554 allocate -= UNITS_PER_WORD;
11555 /* Note that SEH directives need to continue tracking the stack
11556 pointer even after the frame pointer has been set up. */
11557 if (sp_is_cfa_reg || TARGET_SEH)
11560 m->fs.cfa_offset += UNITS_PER_WORD;
11561 RTX_FRAME_RELATED_P (insn) = 1;
11562 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11563 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11564 plus_constant (Pmode, stack_pointer_rtx,
11565 -UNITS_PER_WORD)));
11571 r10 = gen_rtx_REG (Pmode, R10_REG);
11572 insn = emit_insn (gen_push (r10));
11573 allocate -= UNITS_PER_WORD;
11574 if (sp_is_cfa_reg || TARGET_SEH)
11577 m->fs.cfa_offset += UNITS_PER_WORD;
11578 RTX_FRAME_RELATED_P (insn) = 1;
11579 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11580 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11581 plus_constant (Pmode, stack_pointer_rtx,
11582 -UNITS_PER_WORD)));
11586 emit_move_insn (eax, GEN_INT (allocate));
11587 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11589 /* Use the fact that AX still contains ALLOCATE. */
11590 adjust_stack_insn = (Pmode == DImode
11591 ? gen_pro_epilogue_adjust_stack_di_sub
11592 : gen_pro_epilogue_adjust_stack_si_sub);
11594 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11595 stack_pointer_rtx, eax));
11597 if (sp_is_cfa_reg || TARGET_SEH)
11600 m->fs.cfa_offset += allocate;
11601 RTX_FRAME_RELATED_P (insn) = 1;
11602 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11603 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11604 plus_constant (Pmode, stack_pointer_rtx,
11607 m->fs.sp_offset += allocate;
11609 /* Use stack_pointer_rtx for relative addressing so that code
11610 works for realigned stack, too. */
11611 if (r10_live && eax_live)
11613 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11614 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11615 gen_frame_mem (word_mode, t));
11616 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11617 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11618 gen_frame_mem (word_mode, t));
11620 else if (eax_live || r10_live)
11622 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11623 emit_move_insn (gen_rtx_REG (word_mode,
11624 (eax_live ? AX_REG : R10_REG)),
11625 gen_frame_mem (word_mode, t));
11628 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11630 /* If we havn't already set up the frame pointer, do so now. */
11631 if (frame_pointer_needed && !m->fs.fp_valid)
11633 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11634 GEN_INT (frame.stack_pointer_offset
11635 - frame.hard_frame_pointer_offset));
11636 insn = emit_insn (insn);
11637 RTX_FRAME_RELATED_P (insn) = 1;
11638 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11640 if (m->fs.cfa_reg == stack_pointer_rtx)
11641 m->fs.cfa_reg = hard_frame_pointer_rtx;
11642 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11643 m->fs.fp_valid = true;
11646 if (!int_registers_saved)
11647 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11648 if (!sse_registers_saved)
11649 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11651 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11653 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11655 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11656 insn = emit_insn (gen_set_got (pic));
11657 RTX_FRAME_RELATED_P (insn) = 1;
11658 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11659 emit_insn (gen_prologue_use (pic));
11660 /* Deleting already emmitted SET_GOT if exist and allocated to
11661 REAL_PIC_OFFSET_TABLE_REGNUM. */
11662 ix86_elim_entry_set_got (pic);
11665 if (crtl->drap_reg && !crtl->stack_realign_needed)
11667 /* vDRAP is setup but after reload it turns out stack realign
11668 isn't necessary, here we will emit prologue to setup DRAP
11669 without stack realign adjustment */
11670 t = choose_baseaddr (0);
11671 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11674 /* Prevent instructions from being scheduled into register save push
11675 sequence when access to the redzone area is done through frame pointer.
11676 The offset between the frame pointer and the stack pointer is calculated
11677 relative to the value of the stack pointer at the end of the function
11678 prologue, and moving instructions that access redzone area via frame
11679 pointer inside push sequence violates this assumption. */
11680 if (frame_pointer_needed && frame.red_zone_size)
11681 emit_insn (gen_memory_blockage ());
11683 /* Emit cld instruction if stringops are used in the function. */
11684 if (TARGET_CLD && ix86_current_function_needs_cld)
11685 emit_insn (gen_cld ());
11687 /* SEH requires that the prologue end within 256 bytes of the start of
11688 the function. Prevent instruction schedules that would extend that.
11689 Further, prevent alloca modifications to the stack pointer from being
11690 combined with prologue modifications. */
11692 emit_insn (gen_prologue_use (stack_pointer_rtx));
11695 /* Emit code to restore REG using a POP insn. */
11698 ix86_emit_restore_reg_using_pop (rtx reg)
11700 struct machine_function *m = cfun->machine;
11701 rtx insn = emit_insn (gen_pop (reg));
11703 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11704 m->fs.sp_offset -= UNITS_PER_WORD;
11706 if (m->fs.cfa_reg == crtl->drap_reg
11707 && REGNO (reg) == REGNO (crtl->drap_reg))
11709 /* Previously we'd represented the CFA as an expression
11710 like *(%ebp - 8). We've just popped that value from
11711 the stack, which means we need to reset the CFA to
11712 the drap register. This will remain until we restore
11713 the stack pointer. */
11714 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11715 RTX_FRAME_RELATED_P (insn) = 1;
11717 /* This means that the DRAP register is valid for addressing too. */
11718 m->fs.drap_valid = true;
11722 if (m->fs.cfa_reg == stack_pointer_rtx)
11724 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11725 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11726 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11727 RTX_FRAME_RELATED_P (insn) = 1;
11729 m->fs.cfa_offset -= UNITS_PER_WORD;
11732 /* When the frame pointer is the CFA, and we pop it, we are
11733 swapping back to the stack pointer as the CFA. This happens
11734 for stack frames that don't allocate other data, so we assume
11735 the stack pointer is now pointing at the return address, i.e.
11736 the function entry state, which makes the offset be 1 word. */
11737 if (reg == hard_frame_pointer_rtx)
11739 m->fs.fp_valid = false;
11740 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11742 m->fs.cfa_reg = stack_pointer_rtx;
11743 m->fs.cfa_offset -= UNITS_PER_WORD;
11745 add_reg_note (insn, REG_CFA_DEF_CFA,
11746 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11747 GEN_INT (m->fs.cfa_offset)));
11748 RTX_FRAME_RELATED_P (insn) = 1;
11753 /* Emit code to restore saved registers using POP insns. */
11756 ix86_emit_restore_regs_using_pop (void)
11758 unsigned int regno;
11760 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11761 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11762 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11765 /* Emit code and notes for the LEAVE instruction. */
11768 ix86_emit_leave (void)
11770 struct machine_function *m = cfun->machine;
11771 rtx insn = emit_insn (ix86_gen_leave ());
11773 ix86_add_queued_cfa_restore_notes (insn);
11775 gcc_assert (m->fs.fp_valid);
11776 m->fs.sp_valid = true;
11777 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11778 m->fs.fp_valid = false;
11780 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11782 m->fs.cfa_reg = stack_pointer_rtx;
11783 m->fs.cfa_offset = m->fs.sp_offset;
11785 add_reg_note (insn, REG_CFA_DEF_CFA,
11786 plus_constant (Pmode, stack_pointer_rtx,
11788 RTX_FRAME_RELATED_P (insn) = 1;
11790 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11794 /* Emit code to restore saved registers using MOV insns.
11795 First register is restored from CFA - CFA_OFFSET. */
11797 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11798 bool maybe_eh_return)
11800 struct machine_function *m = cfun->machine;
11801 unsigned int regno;
11803 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11804 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11806 rtx reg = gen_rtx_REG (word_mode, regno);
11809 mem = choose_baseaddr (cfa_offset);
11810 mem = gen_frame_mem (word_mode, mem);
11811 insn = emit_move_insn (reg, mem);
11813 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11815 /* Previously we'd represented the CFA as an expression
11816 like *(%ebp - 8). We've just popped that value from
11817 the stack, which means we need to reset the CFA to
11818 the drap register. This will remain until we restore
11819 the stack pointer. */
11820 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11821 RTX_FRAME_RELATED_P (insn) = 1;
11823 /* This means that the DRAP register is valid for addressing. */
11824 m->fs.drap_valid = true;
11827 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11829 cfa_offset -= UNITS_PER_WORD;
11833 /* Emit code to restore saved registers using MOV insns.
11834 First register is restored from CFA - CFA_OFFSET. */
11836 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11837 bool maybe_eh_return)
11839 unsigned int regno;
11841 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11842 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11844 rtx reg = gen_rtx_REG (V4SFmode, regno);
11847 mem = choose_baseaddr (cfa_offset);
11848 mem = gen_rtx_MEM (V4SFmode, mem);
11849 set_mem_align (mem, 128);
11850 emit_move_insn (reg, mem);
11852 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11858 /* Restore function stack, frame, and registers. */
11861 ix86_expand_epilogue (int style)
11863 struct machine_function *m = cfun->machine;
11864 struct machine_frame_state frame_state_save = m->fs;
11865 struct ix86_frame frame;
11866 bool restore_regs_via_mov;
11869 ix86_finalize_stack_realign_flags ();
11870 ix86_compute_frame_layout (&frame);
11872 m->fs.sp_valid = (!frame_pointer_needed
11873 || (crtl->sp_is_unchanging
11874 && !stack_realign_fp));
11875 gcc_assert (!m->fs.sp_valid
11876 || m->fs.sp_offset == frame.stack_pointer_offset);
11878 /* The FP must be valid if the frame pointer is present. */
11879 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11880 gcc_assert (!m->fs.fp_valid
11881 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11883 /* We must have *some* valid pointer to the stack frame. */
11884 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11886 /* The DRAP is never valid at this point. */
11887 gcc_assert (!m->fs.drap_valid);
11889 /* See the comment about red zone and frame
11890 pointer usage in ix86_expand_prologue. */
11891 if (frame_pointer_needed && frame.red_zone_size)
11892 emit_insn (gen_memory_blockage ());
11894 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11895 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11897 /* Determine the CFA offset of the end of the red-zone. */
11898 m->fs.red_zone_offset = 0;
11899 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11901 /* The red-zone begins below the return address. */
11902 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11904 /* When the register save area is in the aligned portion of
11905 the stack, determine the maximum runtime displacement that
11906 matches up with the aligned frame. */
11907 if (stack_realign_drap)
11908 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11912 /* Special care must be taken for the normal return case of a function
11913 using eh_return: the eax and edx registers are marked as saved, but
11914 not restored along this path. Adjust the save location to match. */
11915 if (crtl->calls_eh_return && style != 2)
11916 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11918 /* EH_RETURN requires the use of moves to function properly. */
11919 if (crtl->calls_eh_return)
11920 restore_regs_via_mov = true;
11921 /* SEH requires the use of pops to identify the epilogue. */
11922 else if (TARGET_SEH)
11923 restore_regs_via_mov = false;
11924 /* If we're only restoring one register and sp is not valid then
11925 using a move instruction to restore the register since it's
11926 less work than reloading sp and popping the register. */
11927 else if (!m->fs.sp_valid && frame.nregs <= 1)
11928 restore_regs_via_mov = true;
11929 else if (TARGET_EPILOGUE_USING_MOVE
11930 && cfun->machine->use_fast_prologue_epilogue
11931 && (frame.nregs > 1
11932 || m->fs.sp_offset != frame.reg_save_offset))
11933 restore_regs_via_mov = true;
11934 else if (frame_pointer_needed
11936 && m->fs.sp_offset != frame.reg_save_offset)
11937 restore_regs_via_mov = true;
11938 else if (frame_pointer_needed
11939 && TARGET_USE_LEAVE
11940 && cfun->machine->use_fast_prologue_epilogue
11941 && frame.nregs == 1)
11942 restore_regs_via_mov = true;
11944 restore_regs_via_mov = false;
11946 if (restore_regs_via_mov || frame.nsseregs)
11948 /* Ensure that the entire register save area is addressable via
11949 the stack pointer, if we will restore via sp. */
11951 && m->fs.sp_offset > 0x7fffffff
11952 && !(m->fs.fp_valid || m->fs.drap_valid)
11953 && (frame.nsseregs + frame.nregs) != 0)
11955 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11956 GEN_INT (m->fs.sp_offset
11957 - frame.sse_reg_save_offset),
11959 m->fs.cfa_reg == stack_pointer_rtx);
11963 /* If there are any SSE registers to restore, then we have to do it
11964 via moves, since there's obviously no pop for SSE regs. */
11965 if (frame.nsseregs)
11966 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11969 if (restore_regs_via_mov)
11974 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11976 /* eh_return epilogues need %ecx added to the stack pointer. */
11979 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11981 /* Stack align doesn't work with eh_return. */
11982 gcc_assert (!stack_realign_drap);
11983 /* Neither does regparm nested functions. */
11984 gcc_assert (!ix86_static_chain_on_stack);
11986 if (frame_pointer_needed)
11988 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11989 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11990 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11992 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11993 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11995 /* Note that we use SA as a temporary CFA, as the return
11996 address is at the proper place relative to it. We
11997 pretend this happens at the FP restore insn because
11998 prior to this insn the FP would be stored at the wrong
11999 offset relative to SA, and after this insn we have no
12000 other reasonable register to use for the CFA. We don't
12001 bother resetting the CFA to the SP for the duration of
12002 the return insn. */
12003 add_reg_note (insn, REG_CFA_DEF_CFA,
12004 plus_constant (Pmode, sa, UNITS_PER_WORD));
12005 ix86_add_queued_cfa_restore_notes (insn);
12006 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
12007 RTX_FRAME_RELATED_P (insn) = 1;
12009 m->fs.cfa_reg = sa;
12010 m->fs.cfa_offset = UNITS_PER_WORD;
12011 m->fs.fp_valid = false;
12013 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
12014 const0_rtx, style, false);
12018 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
12019 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
12020 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
12021 ix86_add_queued_cfa_restore_notes (insn);
12023 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
12024 if (m->fs.cfa_offset != UNITS_PER_WORD)
12026 m->fs.cfa_offset = UNITS_PER_WORD;
12027 add_reg_note (insn, REG_CFA_DEF_CFA,
12028 plus_constant (Pmode, stack_pointer_rtx,
12030 RTX_FRAME_RELATED_P (insn) = 1;
12033 m->fs.sp_offset = UNITS_PER_WORD;
12034 m->fs.sp_valid = true;
12039 /* SEH requires that the function end with (1) a stack adjustment
12040 if necessary, (2) a sequence of pops, and (3) a return or
12041 jump instruction. Prevent insns from the function body from
12042 being scheduled into this sequence. */
12045 /* Prevent a catch region from being adjacent to the standard
12046 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
12047 several other flags that would be interesting to test are
12049 if (flag_non_call_exceptions)
12050 emit_insn (gen_nops (const1_rtx));
12052 emit_insn (gen_blockage ());
12055 /* First step is to deallocate the stack frame so that we can
12056 pop the registers. Also do it on SEH target for very large
12057 frame as the emitted instructions aren't allowed by the ABI in
12059 if (!m->fs.sp_valid
12061 && (m->fs.sp_offset - frame.reg_save_offset
12062 >= SEH_MAX_FRAME_SIZE)))
12064 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12065 GEN_INT (m->fs.fp_offset
12066 - frame.reg_save_offset),
12069 else if (m->fs.sp_offset != frame.reg_save_offset)
12071 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12072 GEN_INT (m->fs.sp_offset
12073 - frame.reg_save_offset),
12075 m->fs.cfa_reg == stack_pointer_rtx);
12078 ix86_emit_restore_regs_using_pop ();
12081 /* If we used a stack pointer and haven't already got rid of it,
12083 if (m->fs.fp_valid)
12085 /* If the stack pointer is valid and pointing at the frame
12086 pointer store address, then we only need a pop. */
12087 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12088 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12089 /* Leave results in shorter dependency chains on CPUs that are
12090 able to grok it fast. */
12091 else if (TARGET_USE_LEAVE
12092 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12093 || !cfun->machine->use_fast_prologue_epilogue)
12094 ix86_emit_leave ();
12097 pro_epilogue_adjust_stack (stack_pointer_rtx,
12098 hard_frame_pointer_rtx,
12099 const0_rtx, style, !using_drap);
12100 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12106 int param_ptr_offset = UNITS_PER_WORD;
12109 gcc_assert (stack_realign_drap);
12111 if (ix86_static_chain_on_stack)
12112 param_ptr_offset += UNITS_PER_WORD;
12113 if (!call_used_regs[REGNO (crtl->drap_reg)])
12114 param_ptr_offset += UNITS_PER_WORD;
12116 insn = emit_insn (gen_rtx_SET
12117 (VOIDmode, stack_pointer_rtx,
12118 gen_rtx_PLUS (Pmode,
12120 GEN_INT (-param_ptr_offset))));
12121 m->fs.cfa_reg = stack_pointer_rtx;
12122 m->fs.cfa_offset = param_ptr_offset;
12123 m->fs.sp_offset = param_ptr_offset;
12124 m->fs.realigned = false;
12126 add_reg_note (insn, REG_CFA_DEF_CFA,
12127 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12128 GEN_INT (param_ptr_offset)));
12129 RTX_FRAME_RELATED_P (insn) = 1;
12131 if (!call_used_regs[REGNO (crtl->drap_reg)])
12132 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12135 /* At this point the stack pointer must be valid, and we must have
12136 restored all of the registers. We may not have deallocated the
12137 entire stack frame. We've delayed this until now because it may
12138 be possible to merge the local stack deallocation with the
12139 deallocation forced by ix86_static_chain_on_stack. */
12140 gcc_assert (m->fs.sp_valid);
12141 gcc_assert (!m->fs.fp_valid);
12142 gcc_assert (!m->fs.realigned);
12143 if (m->fs.sp_offset != UNITS_PER_WORD)
12145 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12146 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12150 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12152 /* Sibcall epilogues don't want a return instruction. */
12155 m->fs = frame_state_save;
12159 if (crtl->args.pops_args && crtl->args.size)
12161 rtx popc = GEN_INT (crtl->args.pops_args);
12163 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12164 address, do explicit add, and jump indirectly to the caller. */
12166 if (crtl->args.pops_args >= 65536)
12168 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12171 /* There is no "pascal" calling convention in any 64bit ABI. */
12172 gcc_assert (!TARGET_64BIT);
12174 insn = emit_insn (gen_pop (ecx));
12175 m->fs.cfa_offset -= UNITS_PER_WORD;
12176 m->fs.sp_offset -= UNITS_PER_WORD;
12178 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12179 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
12180 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12181 add_reg_note (insn, REG_CFA_REGISTER,
12182 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
12183 RTX_FRAME_RELATED_P (insn) = 1;
12185 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12187 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12190 emit_jump_insn (gen_simple_return_pop_internal (popc));
12193 emit_jump_insn (gen_simple_return_internal ());
12195 /* Restore the state back to the state from the prologue,
12196 so that it's correct for the next epilogue. */
12197 m->fs = frame_state_save;
12200 /* Reset from the function's potential modifications. */
12203 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12205 if (pic_offset_table_rtx
12206 && !ix86_use_pseudo_pic_reg ())
12207 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12209 /* Mach-O doesn't support labels at the end of objects, so if
12210 it looks like we might want one, insert a NOP. */
12212 rtx_insn *insn = get_last_insn ();
12213 rtx_insn *deleted_debug_label = NULL;
12216 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12218 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12219 notes only, instead set their CODE_LABEL_NUMBER to -1,
12220 otherwise there would be code generation differences
12221 in between -g and -g0. */
12222 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12223 deleted_debug_label = insn;
12224 insn = PREV_INSN (insn);
12229 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12230 fputs ("\tnop\n", file);
12231 else if (deleted_debug_label)
12232 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12233 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12234 CODE_LABEL_NUMBER (insn) = -1;
12240 /* Return a scratch register to use in the split stack prologue. The
12241 split stack prologue is used for -fsplit-stack. It is the first
12242 instructions in the function, even before the regular prologue.
12243 The scratch register can be any caller-saved register which is not
12244 used for parameters or for the static chain. */
12246 static unsigned int
12247 split_stack_prologue_scratch_regno (void)
12253 bool is_fastcall, is_thiscall;
12256 is_fastcall = (lookup_attribute ("fastcall",
12257 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12259 is_thiscall = (lookup_attribute ("thiscall",
12260 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12262 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12266 if (DECL_STATIC_CHAIN (cfun->decl))
12268 sorry ("-fsplit-stack does not support fastcall with "
12269 "nested function");
12270 return INVALID_REGNUM;
12274 else if (is_thiscall)
12276 if (!DECL_STATIC_CHAIN (cfun->decl))
12280 else if (regparm < 3)
12282 if (!DECL_STATIC_CHAIN (cfun->decl))
12288 sorry ("-fsplit-stack does not support 2 register "
12289 "parameters for a nested function");
12290 return INVALID_REGNUM;
12297 /* FIXME: We could make this work by pushing a register
12298 around the addition and comparison. */
12299 sorry ("-fsplit-stack does not support 3 register parameters");
12300 return INVALID_REGNUM;
12305 /* A SYMBOL_REF for the function which allocates new stackspace for
12308 static GTY(()) rtx split_stack_fn;
12310 /* A SYMBOL_REF for the more stack function when using the large
12313 static GTY(()) rtx split_stack_fn_large;
12315 /* Handle -fsplit-stack. These are the first instructions in the
12316 function, even before the regular prologue. */
12319 ix86_expand_split_stack_prologue (void)
12321 struct ix86_frame frame;
12322 HOST_WIDE_INT allocate;
12323 unsigned HOST_WIDE_INT args_size;
12324 rtx_code_label *label;
12325 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12326 rtx scratch_reg = NULL_RTX;
12327 rtx_code_label *varargs_label = NULL;
12330 gcc_assert (flag_split_stack && reload_completed);
12332 ix86_finalize_stack_realign_flags ();
12333 ix86_compute_frame_layout (&frame);
12334 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12336 /* This is the label we will branch to if we have enough stack
12337 space. We expect the basic block reordering pass to reverse this
12338 branch if optimizing, so that we branch in the unlikely case. */
12339 label = gen_label_rtx ();
12341 /* We need to compare the stack pointer minus the frame size with
12342 the stack boundary in the TCB. The stack boundary always gives
12343 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12344 can compare directly. Otherwise we need to do an addition. */
12346 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12347 UNSPEC_STACK_CHECK);
12348 limit = gen_rtx_CONST (Pmode, limit);
12349 limit = gen_rtx_MEM (Pmode, limit);
12350 if (allocate < SPLIT_STACK_AVAILABLE)
12351 current = stack_pointer_rtx;
12354 unsigned int scratch_regno;
12357 /* We need a scratch register to hold the stack pointer minus
12358 the required frame size. Since this is the very start of the
12359 function, the scratch register can be any caller-saved
12360 register which is not used for parameters. */
12361 offset = GEN_INT (- allocate);
12362 scratch_regno = split_stack_prologue_scratch_regno ();
12363 if (scratch_regno == INVALID_REGNUM)
12365 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12366 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12368 /* We don't use ix86_gen_add3 in this case because it will
12369 want to split to lea, but when not optimizing the insn
12370 will not be split after this point. */
12371 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12372 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12377 emit_move_insn (scratch_reg, offset);
12378 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12379 stack_pointer_rtx));
12381 current = scratch_reg;
12384 ix86_expand_branch (GEU, current, limit, label);
12385 jump_insn = get_last_insn ();
12386 JUMP_LABEL (jump_insn) = label;
12388 /* Mark the jump as very likely to be taken. */
12389 add_int_reg_note (jump_insn, REG_BR_PROB,
12390 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12392 if (split_stack_fn == NULL_RTX)
12394 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12395 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12397 fn = split_stack_fn;
12399 /* Get more stack space. We pass in the desired stack space and the
12400 size of the arguments to copy to the new stack. In 32-bit mode
12401 we push the parameters; __morestack will return on a new stack
12402 anyhow. In 64-bit mode we pass the parameters in r10 and
12404 allocate_rtx = GEN_INT (allocate);
12405 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12406 call_fusage = NULL_RTX;
12411 reg10 = gen_rtx_REG (Pmode, R10_REG);
12412 reg11 = gen_rtx_REG (Pmode, R11_REG);
12414 /* If this function uses a static chain, it will be in %r10.
12415 Preserve it across the call to __morestack. */
12416 if (DECL_STATIC_CHAIN (cfun->decl))
12420 rax = gen_rtx_REG (word_mode, AX_REG);
12421 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12422 use_reg (&call_fusage, rax);
12425 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12428 HOST_WIDE_INT argval;
12430 gcc_assert (Pmode == DImode);
12431 /* When using the large model we need to load the address
12432 into a register, and we've run out of registers. So we
12433 switch to a different calling convention, and we call a
12434 different function: __morestack_large. We pass the
12435 argument size in the upper 32 bits of r10 and pass the
12436 frame size in the lower 32 bits. */
12437 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12438 gcc_assert ((args_size & 0xffffffff) == args_size);
12440 if (split_stack_fn_large == NULL_RTX)
12442 split_stack_fn_large =
12443 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12444 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12446 if (ix86_cmodel == CM_LARGE_PIC)
12448 rtx_code_label *label;
12451 label = gen_label_rtx ();
12452 emit_label (label);
12453 LABEL_PRESERVE_P (label) = 1;
12454 emit_insn (gen_set_rip_rex64 (reg10, label));
12455 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12456 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12457 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12459 x = gen_rtx_CONST (Pmode, x);
12460 emit_move_insn (reg11, x);
12461 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12462 x = gen_const_mem (Pmode, x);
12463 emit_move_insn (reg11, x);
12466 emit_move_insn (reg11, split_stack_fn_large);
12470 argval = ((args_size << 16) << 16) + allocate;
12471 emit_move_insn (reg10, GEN_INT (argval));
12475 emit_move_insn (reg10, allocate_rtx);
12476 emit_move_insn (reg11, GEN_INT (args_size));
12477 use_reg (&call_fusage, reg11);
12480 use_reg (&call_fusage, reg10);
12484 emit_insn (gen_push (GEN_INT (args_size)));
12485 emit_insn (gen_push (allocate_rtx));
12487 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12488 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12490 add_function_usage_to (call_insn, call_fusage);
12492 /* In order to make call/return prediction work right, we now need
12493 to execute a return instruction. See
12494 libgcc/config/i386/morestack.S for the details on how this works.
12496 For flow purposes gcc must not see this as a return
12497 instruction--we need control flow to continue at the subsequent
12498 label. Therefore, we use an unspec. */
12499 gcc_assert (crtl->args.pops_args < 65536);
12500 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12502 /* If we are in 64-bit mode and this function uses a static chain,
12503 we saved %r10 in %rax before calling _morestack. */
12504 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12505 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12506 gen_rtx_REG (word_mode, AX_REG));
12508 /* If this function calls va_start, we need to store a pointer to
12509 the arguments on the old stack, because they may not have been
12510 all copied to the new stack. At this point the old stack can be
12511 found at the frame pointer value used by __morestack, because
12512 __morestack has set that up before calling back to us. Here we
12513 store that pointer in a scratch register, and in
12514 ix86_expand_prologue we store the scratch register in a stack
12516 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12518 unsigned int scratch_regno;
12522 scratch_regno = split_stack_prologue_scratch_regno ();
12523 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12524 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12528 return address within this function
12529 return address of caller of this function
12531 So we add three words to get to the stack arguments.
12535 return address within this function
12536 first argument to __morestack
12537 second argument to __morestack
12538 return address of caller of this function
12540 So we add five words to get to the stack arguments.
12542 words = TARGET_64BIT ? 3 : 5;
12543 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12544 gen_rtx_PLUS (Pmode, frame_reg,
12545 GEN_INT (words * UNITS_PER_WORD))));
12547 varargs_label = gen_label_rtx ();
12548 emit_jump_insn (gen_jump (varargs_label));
12549 JUMP_LABEL (get_last_insn ()) = varargs_label;
12554 emit_label (label);
12555 LABEL_NUSES (label) = 1;
12557 /* If this function calls va_start, we now have to set the scratch
12558 register for the case where we do not call __morestack. In this
12559 case we need to set it based on the stack pointer. */
12560 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12562 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12563 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12564 GEN_INT (UNITS_PER_WORD))));
12566 emit_label (varargs_label);
12567 LABEL_NUSES (varargs_label) = 1;
12571 /* We may have to tell the dataflow pass that the split stack prologue
12572 is initializing a scratch register. */
12575 ix86_live_on_entry (bitmap regs)
12577 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12579 gcc_assert (flag_split_stack);
12580 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12584 /* Extract the parts of an RTL expression that is a valid memory address
12585 for an instruction. Return 0 if the structure of the address is
12586 grossly off. Return -1 if the address contains ASHIFT, so it is not
12587 strictly valid, but still used for computing length of lea instruction. */
12590 ix86_decompose_address (rtx addr, struct ix86_address *out)
12592 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12593 rtx base_reg, index_reg;
12594 HOST_WIDE_INT scale = 1;
12595 rtx scale_rtx = NULL_RTX;
12598 enum ix86_address_seg seg = SEG_DEFAULT;
12600 /* Allow zero-extended SImode addresses,
12601 they will be emitted with addr32 prefix. */
12602 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12604 if (GET_CODE (addr) == ZERO_EXTEND
12605 && GET_MODE (XEXP (addr, 0)) == SImode)
12607 addr = XEXP (addr, 0);
12608 if (CONST_INT_P (addr))
12611 else if (GET_CODE (addr) == AND
12612 && const_32bit_mask (XEXP (addr, 1), DImode))
12614 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12615 if (addr == NULL_RTX)
12618 if (CONST_INT_P (addr))
12623 /* Allow SImode subregs of DImode addresses,
12624 they will be emitted with addr32 prefix. */
12625 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12627 if (GET_CODE (addr) == SUBREG
12628 && GET_MODE (SUBREG_REG (addr)) == DImode)
12630 addr = SUBREG_REG (addr);
12631 if (CONST_INT_P (addr))
12638 else if (GET_CODE (addr) == SUBREG)
12640 if (REG_P (SUBREG_REG (addr)))
12645 else if (GET_CODE (addr) == PLUS)
12647 rtx addends[4], op;
12655 addends[n++] = XEXP (op, 1);
12658 while (GET_CODE (op) == PLUS);
12663 for (i = n; i >= 0; --i)
12666 switch (GET_CODE (op))
12671 index = XEXP (op, 0);
12672 scale_rtx = XEXP (op, 1);
12678 index = XEXP (op, 0);
12679 tmp = XEXP (op, 1);
12680 if (!CONST_INT_P (tmp))
12682 scale = INTVAL (tmp);
12683 if ((unsigned HOST_WIDE_INT) scale > 3)
12685 scale = 1 << scale;
12690 if (GET_CODE (op) != UNSPEC)
12695 if (XINT (op, 1) == UNSPEC_TP
12696 && TARGET_TLS_DIRECT_SEG_REFS
12697 && seg == SEG_DEFAULT)
12698 seg = DEFAULT_TLS_SEG_REG;
12704 if (!REG_P (SUBREG_REG (op)))
12731 else if (GET_CODE (addr) == MULT)
12733 index = XEXP (addr, 0); /* index*scale */
12734 scale_rtx = XEXP (addr, 1);
12736 else if (GET_CODE (addr) == ASHIFT)
12738 /* We're called for lea too, which implements ashift on occasion. */
12739 index = XEXP (addr, 0);
12740 tmp = XEXP (addr, 1);
12741 if (!CONST_INT_P (tmp))
12743 scale = INTVAL (tmp);
12744 if ((unsigned HOST_WIDE_INT) scale > 3)
12746 scale = 1 << scale;
12750 disp = addr; /* displacement */
12756 else if (GET_CODE (index) == SUBREG
12757 && REG_P (SUBREG_REG (index)))
12763 /* Extract the integral value of scale. */
12766 if (!CONST_INT_P (scale_rtx))
12768 scale = INTVAL (scale_rtx);
12771 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12772 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12774 /* Avoid useless 0 displacement. */
12775 if (disp == const0_rtx && (base || index))
12778 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12779 if (base_reg && index_reg && scale == 1
12780 && (index_reg == arg_pointer_rtx
12781 || index_reg == frame_pointer_rtx
12782 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12784 std::swap (base, index);
12785 std::swap (base_reg, index_reg);
12788 /* Special case: %ebp cannot be encoded as a base without a displacement.
12792 && (base_reg == hard_frame_pointer_rtx
12793 || base_reg == frame_pointer_rtx
12794 || base_reg == arg_pointer_rtx
12795 || (REG_P (base_reg)
12796 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12797 || REGNO (base_reg) == R13_REG))))
12800 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12801 Avoid this by transforming to [%esi+0].
12802 Reload calls address legitimization without cfun defined, so we need
12803 to test cfun for being non-NULL. */
12804 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12805 && base_reg && !index_reg && !disp
12806 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12809 /* Special case: encode reg+reg instead of reg*2. */
12810 if (!base && index && scale == 2)
12811 base = index, base_reg = index_reg, scale = 1;
12813 /* Special case: scaling cannot be encoded without base or displacement. */
12814 if (!base && !disp && index && scale != 1)
12818 out->index = index;
12820 out->scale = scale;
12826 /* Return cost of the memory address x.
12827 For i386, it is better to use a complex address than let gcc copy
12828 the address into a reg and make a new pseudo. But not if the address
12829 requires to two regs - that would mean more pseudos with longer
12832 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
12834 struct ix86_address parts;
12836 int ok = ix86_decompose_address (x, &parts);
12840 if (parts.base && GET_CODE (parts.base) == SUBREG)
12841 parts.base = SUBREG_REG (parts.base);
12842 if (parts.index && GET_CODE (parts.index) == SUBREG)
12843 parts.index = SUBREG_REG (parts.index);
12845 /* Attempt to minimize number of registers in the address. */
12847 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12849 && (!REG_P (parts.index)
12850 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12853 /* When address base or index is "pic_offset_table_rtx" we don't increase
12854 address cost. When a memopt with "pic_offset_table_rtx" is not invariant
12855 itself it most likely means that base or index is not invariant.
12856 Therefore only "pic_offset_table_rtx" could be hoisted out, which is not
12857 profitable for x86. */
12859 && (current_pass->type == GIMPLE_PASS
12860 || (!pic_offset_table_rtx
12861 || REGNO (pic_offset_table_rtx) != REGNO(parts.base)))
12862 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12864 && (current_pass->type == GIMPLE_PASS
12865 || (!pic_offset_table_rtx
12866 || REGNO (pic_offset_table_rtx) != REGNO(parts.index)))
12867 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12868 && parts.base != parts.index)
12871 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12872 since it's predecode logic can't detect the length of instructions
12873 and it degenerates to vector decoded. Increase cost of such
12874 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12875 to split such addresses or even refuse such addresses at all.
12877 Following addressing modes are affected:
12882 The first and last case may be avoidable by explicitly coding the zero in
12883 memory address, but I don't have AMD-K6 machine handy to check this
12887 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12888 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12889 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12895 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12896 this is used for to form addresses to local data when -fPIC is in
12900 darwin_local_data_pic (rtx disp)
12902 return (GET_CODE (disp) == UNSPEC
12903 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12906 /* Determine if a given RTX is a valid constant. We already know this
12907 satisfies CONSTANT_P. */
12910 ix86_legitimate_constant_p (machine_mode, rtx x)
12912 /* Pointer bounds constants are not valid. */
12913 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
12916 switch (GET_CODE (x))
12921 if (GET_CODE (x) == PLUS)
12923 if (!CONST_INT_P (XEXP (x, 1)))
12928 if (TARGET_MACHO && darwin_local_data_pic (x))
12931 /* Only some unspecs are valid as "constants". */
12932 if (GET_CODE (x) == UNSPEC)
12933 switch (XINT (x, 1))
12936 case UNSPEC_GOTOFF:
12937 case UNSPEC_PLTOFF:
12938 return TARGET_64BIT;
12940 case UNSPEC_NTPOFF:
12941 x = XVECEXP (x, 0, 0);
12942 return (GET_CODE (x) == SYMBOL_REF
12943 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12944 case UNSPEC_DTPOFF:
12945 x = XVECEXP (x, 0, 0);
12946 return (GET_CODE (x) == SYMBOL_REF
12947 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12952 /* We must have drilled down to a symbol. */
12953 if (GET_CODE (x) == LABEL_REF)
12955 if (GET_CODE (x) != SYMBOL_REF)
12960 /* TLS symbols are never valid. */
12961 if (SYMBOL_REF_TLS_MODEL (x))
12964 /* DLLIMPORT symbols are never valid. */
12965 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12966 && SYMBOL_REF_DLLIMPORT_P (x))
12970 /* mdynamic-no-pic */
12971 if (MACHO_DYNAMIC_NO_PIC_P)
12972 return machopic_symbol_defined_p (x);
12977 if (GET_MODE (x) == TImode
12978 && x != CONST0_RTX (TImode)
12984 if (!standard_sse_constant_p (x))
12991 /* Otherwise we handle everything else in the move patterns. */
12995 /* Determine if it's legal to put X into the constant pool. This
12996 is not possible for the address of thread-local symbols, which
12997 is checked above. */
13000 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
13002 /* We can always put integral constants and vectors in memory. */
13003 switch (GET_CODE (x))
13013 return !ix86_legitimate_constant_p (mode, x);
13016 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
13020 is_imported_p (rtx x)
13022 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
13023 || GET_CODE (x) != SYMBOL_REF)
13026 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
13030 /* Nonzero if the constant value X is a legitimate general operand
13031 when generating PIC code. It is given that flag_pic is on and
13032 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
13035 legitimate_pic_operand_p (rtx x)
13039 switch (GET_CODE (x))
13042 inner = XEXP (x, 0);
13043 if (GET_CODE (inner) == PLUS
13044 && CONST_INT_P (XEXP (inner, 1)))
13045 inner = XEXP (inner, 0);
13047 /* Only some unspecs are valid as "constants". */
13048 if (GET_CODE (inner) == UNSPEC)
13049 switch (XINT (inner, 1))
13052 case UNSPEC_GOTOFF:
13053 case UNSPEC_PLTOFF:
13054 return TARGET_64BIT;
13056 x = XVECEXP (inner, 0, 0);
13057 return (GET_CODE (x) == SYMBOL_REF
13058 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13059 case UNSPEC_MACHOPIC_OFFSET:
13060 return legitimate_pic_address_disp_p (x);
13068 return legitimate_pic_address_disp_p (x);
13075 /* Determine if a given CONST RTX is a valid memory displacement
13079 legitimate_pic_address_disp_p (rtx disp)
13083 /* In 64bit mode we can allow direct addresses of symbols and labels
13084 when they are not dynamic symbols. */
13087 rtx op0 = disp, op1;
13089 switch (GET_CODE (disp))
13095 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13097 op0 = XEXP (XEXP (disp, 0), 0);
13098 op1 = XEXP (XEXP (disp, 0), 1);
13099 if (!CONST_INT_P (op1)
13100 || INTVAL (op1) >= 16*1024*1024
13101 || INTVAL (op1) < -16*1024*1024)
13103 if (GET_CODE (op0) == LABEL_REF)
13105 if (GET_CODE (op0) == CONST
13106 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13107 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13109 if (GET_CODE (op0) == UNSPEC
13110 && XINT (op0, 1) == UNSPEC_PCREL)
13112 if (GET_CODE (op0) != SYMBOL_REF)
13117 /* TLS references should always be enclosed in UNSPEC.
13118 The dllimported symbol needs always to be resolved. */
13119 if (SYMBOL_REF_TLS_MODEL (op0)
13120 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13125 if (is_imported_p (op0))
13128 if (SYMBOL_REF_FAR_ADDR_P (op0)
13129 || !SYMBOL_REF_LOCAL_P (op0))
13132 /* Function-symbols need to be resolved only for
13134 For the small-model we don't need to resolve anything
13136 if ((ix86_cmodel != CM_LARGE_PIC
13137 && SYMBOL_REF_FUNCTION_P (op0))
13138 || ix86_cmodel == CM_SMALL_PIC)
13140 /* Non-external symbols don't need to be resolved for
13141 large, and medium-model. */
13142 if ((ix86_cmodel == CM_LARGE_PIC
13143 || ix86_cmodel == CM_MEDIUM_PIC)
13144 && !SYMBOL_REF_EXTERNAL_P (op0))
13147 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13148 && (SYMBOL_REF_LOCAL_P (op0)
13149 || (HAVE_LD_PIE_COPYRELOC
13151 && !SYMBOL_REF_WEAK (op0)
13152 && !SYMBOL_REF_FUNCTION_P (op0)))
13153 && ix86_cmodel != CM_LARGE_PIC)
13161 if (GET_CODE (disp) != CONST)
13163 disp = XEXP (disp, 0);
13167 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13168 of GOT tables. We should not need these anyway. */
13169 if (GET_CODE (disp) != UNSPEC
13170 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13171 && XINT (disp, 1) != UNSPEC_GOTOFF
13172 && XINT (disp, 1) != UNSPEC_PCREL
13173 && XINT (disp, 1) != UNSPEC_PLTOFF))
13176 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13177 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13183 if (GET_CODE (disp) == PLUS)
13185 if (!CONST_INT_P (XEXP (disp, 1)))
13187 disp = XEXP (disp, 0);
13191 if (TARGET_MACHO && darwin_local_data_pic (disp))
13194 if (GET_CODE (disp) != UNSPEC)
13197 switch (XINT (disp, 1))
13202 /* We need to check for both symbols and labels because VxWorks loads
13203 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13205 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13206 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13207 case UNSPEC_GOTOFF:
13208 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13209 While ABI specify also 32bit relocation but we don't produce it in
13210 small PIC model at all. */
13211 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13212 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13214 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13216 case UNSPEC_GOTTPOFF:
13217 case UNSPEC_GOTNTPOFF:
13218 case UNSPEC_INDNTPOFF:
13221 disp = XVECEXP (disp, 0, 0);
13222 return (GET_CODE (disp) == SYMBOL_REF
13223 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13224 case UNSPEC_NTPOFF:
13225 disp = XVECEXP (disp, 0, 0);
13226 return (GET_CODE (disp) == SYMBOL_REF
13227 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13228 case UNSPEC_DTPOFF:
13229 disp = XVECEXP (disp, 0, 0);
13230 return (GET_CODE (disp) == SYMBOL_REF
13231 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13237 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
13238 replace the input X, or the original X if no replacement is called for.
13239 The output parameter *WIN is 1 if the calling macro should goto WIN,
13240 0 if it should not. */
13243 ix86_legitimize_reload_address (rtx x, machine_mode, int opnum, int type,
13246 /* Reload can generate:
13248 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
13252 This RTX is rejected from ix86_legitimate_address_p due to
13253 non-strictness of base register 97. Following this rejection,
13254 reload pushes all three components into separate registers,
13255 creating invalid memory address RTX.
13257 Following code reloads only the invalid part of the
13258 memory address RTX. */
13260 if (GET_CODE (x) == PLUS
13261 && REG_P (XEXP (x, 1))
13262 && GET_CODE (XEXP (x, 0)) == PLUS
13263 && REG_P (XEXP (XEXP (x, 0), 1)))
13266 bool something_reloaded = false;
13268 base = XEXP (XEXP (x, 0), 1);
13269 if (!REG_OK_FOR_BASE_STRICT_P (base))
13271 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
13272 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13273 opnum, (enum reload_type) type);
13274 something_reloaded = true;
13277 index = XEXP (x, 1);
13278 if (!REG_OK_FOR_INDEX_STRICT_P (index))
13280 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
13281 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13282 opnum, (enum reload_type) type);
13283 something_reloaded = true;
13286 gcc_assert (something_reloaded);
13293 /* Determine if op is suitable RTX for an address register.
13294 Return naked register if a register or a register subreg is
13295 found, otherwise return NULL_RTX. */
13298 ix86_validate_address_register (rtx op)
13300 machine_mode mode = GET_MODE (op);
13302 /* Only SImode or DImode registers can form the address. */
13303 if (mode != SImode && mode != DImode)
13308 else if (GET_CODE (op) == SUBREG)
13310 rtx reg = SUBREG_REG (op);
13315 mode = GET_MODE (reg);
13317 /* Don't allow SUBREGs that span more than a word. It can
13318 lead to spill failures when the register is one word out
13319 of a two word structure. */
13320 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13323 /* Allow only SUBREGs of non-eliminable hard registers. */
13324 if (register_no_elim_operand (reg, mode))
13328 /* Op is not a register. */
13332 /* Recognizes RTL expressions that are valid memory addresses for an
13333 instruction. The MODE argument is the machine mode for the MEM
13334 expression that wants to use this address.
13336 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13337 convert common non-canonical forms to canonical form so that they will
13341 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13343 struct ix86_address parts;
13344 rtx base, index, disp;
13345 HOST_WIDE_INT scale;
13346 enum ix86_address_seg seg;
13348 if (ix86_decompose_address (addr, &parts) <= 0)
13349 /* Decomposition failed. */
13353 index = parts.index;
13355 scale = parts.scale;
13358 /* Validate base register. */
13361 rtx reg = ix86_validate_address_register (base);
13363 if (reg == NULL_RTX)
13366 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13367 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13368 /* Base is not valid. */
13372 /* Validate index register. */
13375 rtx reg = ix86_validate_address_register (index);
13377 if (reg == NULL_RTX)
13380 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13381 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13382 /* Index is not valid. */
13386 /* Index and base should have the same mode. */
13388 && GET_MODE (base) != GET_MODE (index))
13391 /* Address override works only on the (%reg) part of %fs:(%reg). */
13392 if (seg != SEG_DEFAULT
13393 && ((base && GET_MODE (base) != word_mode)
13394 || (index && GET_MODE (index) != word_mode)))
13397 /* Validate scale factor. */
13401 /* Scale without index. */
13404 if (scale != 2 && scale != 4 && scale != 8)
13405 /* Scale is not a valid multiplier. */
13409 /* Validate displacement. */
13412 if (GET_CODE (disp) == CONST
13413 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13414 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13415 switch (XINT (XEXP (disp, 0), 1))
13417 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13418 used. While ABI specify also 32bit relocations, we don't produce
13419 them at all and use IP relative instead. */
13421 case UNSPEC_GOTOFF:
13422 gcc_assert (flag_pic);
13424 goto is_legitimate_pic;
13426 /* 64bit address unspec. */
13429 case UNSPEC_GOTPCREL:
13431 gcc_assert (flag_pic);
13432 goto is_legitimate_pic;
13434 case UNSPEC_GOTTPOFF:
13435 case UNSPEC_GOTNTPOFF:
13436 case UNSPEC_INDNTPOFF:
13437 case UNSPEC_NTPOFF:
13438 case UNSPEC_DTPOFF:
13441 case UNSPEC_STACK_CHECK:
13442 gcc_assert (flag_split_stack);
13446 /* Invalid address unspec. */
13450 else if (SYMBOLIC_CONST (disp)
13454 && MACHOPIC_INDIRECT
13455 && !machopic_operand_p (disp)
13461 if (TARGET_64BIT && (index || base))
13463 /* foo@dtpoff(%rX) is ok. */
13464 if (GET_CODE (disp) != CONST
13465 || GET_CODE (XEXP (disp, 0)) != PLUS
13466 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13467 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13468 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13469 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13470 /* Non-constant pic memory reference. */
13473 else if ((!TARGET_MACHO || flag_pic)
13474 && ! legitimate_pic_address_disp_p (disp))
13475 /* Displacement is an invalid pic construct. */
13478 else if (MACHO_DYNAMIC_NO_PIC_P
13479 && !ix86_legitimate_constant_p (Pmode, disp))
13480 /* displacment must be referenced via non_lazy_pointer */
13484 /* This code used to verify that a symbolic pic displacement
13485 includes the pic_offset_table_rtx register.
13487 While this is good idea, unfortunately these constructs may
13488 be created by "adds using lea" optimization for incorrect
13497 This code is nonsensical, but results in addressing
13498 GOT table with pic_offset_table_rtx base. We can't
13499 just refuse it easily, since it gets matched by
13500 "addsi3" pattern, that later gets split to lea in the
13501 case output register differs from input. While this
13502 can be handled by separate addsi pattern for this case
13503 that never results in lea, this seems to be easier and
13504 correct fix for crash to disable this test. */
13506 else if (GET_CODE (disp) != LABEL_REF
13507 && !CONST_INT_P (disp)
13508 && (GET_CODE (disp) != CONST
13509 || !ix86_legitimate_constant_p (Pmode, disp))
13510 && (GET_CODE (disp) != SYMBOL_REF
13511 || !ix86_legitimate_constant_p (Pmode, disp)))
13512 /* Displacement is not constant. */
13514 else if (TARGET_64BIT
13515 && !x86_64_immediate_operand (disp, VOIDmode))
13516 /* Displacement is out of range. */
13518 /* In x32 mode, constant addresses are sign extended to 64bit, so
13519 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13520 else if (TARGET_X32 && !(index || base)
13521 && CONST_INT_P (disp)
13522 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13526 /* Everything looks valid. */
13530 /* Determine if a given RTX is a valid constant address. */
13533 constant_address_p (rtx x)
13535 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13538 /* Return a unique alias set for the GOT. */
13540 static alias_set_type
13541 ix86_GOT_alias_set (void)
13543 static alias_set_type set = -1;
13545 set = new_alias_set ();
13549 /* Set regs_ever_live for PIC base address register
13550 to true if required. */
13552 set_pic_reg_ever_live ()
13554 if (reload_in_progress)
13555 df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true);
13558 /* Return a legitimate reference for ORIG (an address) using the
13559 register REG. If REG is 0, a new pseudo is generated.
13561 There are two types of references that must be handled:
13563 1. Global data references must load the address from the GOT, via
13564 the PIC reg. An insn is emitted to do this load, and the reg is
13567 2. Static data references, constant pool addresses, and code labels
13568 compute the address as an offset from the GOT, whose base is in
13569 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13570 differentiate them from global data objects. The returned
13571 address is the PIC reg + an unspec constant.
13573 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13574 reg also appears in the address. */
13577 legitimize_pic_address (rtx orig, rtx reg)
13580 rtx new_rtx = orig;
13583 if (TARGET_MACHO && !TARGET_64BIT)
13586 reg = gen_reg_rtx (Pmode);
13587 /* Use the generic Mach-O PIC machinery. */
13588 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13592 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13594 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13599 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13601 else if (TARGET_64BIT && !TARGET_PECOFF
13602 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13605 /* This symbol may be referenced via a displacement from the PIC
13606 base address (@GOTOFF). */
13608 set_pic_reg_ever_live ();
13609 if (GET_CODE (addr) == CONST)
13610 addr = XEXP (addr, 0);
13611 if (GET_CODE (addr) == PLUS)
13613 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13615 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13618 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13619 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13621 tmpreg = gen_reg_rtx (Pmode);
13624 emit_move_insn (tmpreg, new_rtx);
13628 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13629 tmpreg, 1, OPTAB_DIRECT);
13633 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13635 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13637 /* This symbol may be referenced via a displacement from the PIC
13638 base address (@GOTOFF). */
13640 set_pic_reg_ever_live ();
13641 if (GET_CODE (addr) == CONST)
13642 addr = XEXP (addr, 0);
13643 if (GET_CODE (addr) == PLUS)
13645 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13647 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13650 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13651 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13652 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13656 emit_move_insn (reg, new_rtx);
13660 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13661 /* We can't use @GOTOFF for text labels on VxWorks;
13662 see gotoff_operand. */
13663 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13665 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13669 /* For x64 PE-COFF there is no GOT table. So we use address
13671 if (TARGET_64BIT && TARGET_PECOFF)
13673 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13674 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13677 reg = gen_reg_rtx (Pmode);
13678 emit_move_insn (reg, new_rtx);
13681 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13683 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13684 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13685 new_rtx = gen_const_mem (Pmode, new_rtx);
13686 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13689 reg = gen_reg_rtx (Pmode);
13690 /* Use directly gen_movsi, otherwise the address is loaded
13691 into register for CSE. We don't want to CSE this addresses,
13692 instead we CSE addresses from the GOT table, so skip this. */
13693 emit_insn (gen_movsi (reg, new_rtx));
13698 /* This symbol must be referenced via a load from the
13699 Global Offset Table (@GOT). */
13701 set_pic_reg_ever_live ();
13702 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13703 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13705 new_rtx = force_reg (Pmode, new_rtx);
13706 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13707 new_rtx = gen_const_mem (Pmode, new_rtx);
13708 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13711 reg = gen_reg_rtx (Pmode);
13712 emit_move_insn (reg, new_rtx);
13718 if (CONST_INT_P (addr)
13719 && !x86_64_immediate_operand (addr, VOIDmode))
13723 emit_move_insn (reg, addr);
13727 new_rtx = force_reg (Pmode, addr);
13729 else if (GET_CODE (addr) == CONST)
13731 addr = XEXP (addr, 0);
13733 /* We must match stuff we generate before. Assume the only
13734 unspecs that can get here are ours. Not that we could do
13735 anything with them anyway.... */
13736 if (GET_CODE (addr) == UNSPEC
13737 || (GET_CODE (addr) == PLUS
13738 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13740 gcc_assert (GET_CODE (addr) == PLUS);
13742 if (GET_CODE (addr) == PLUS)
13744 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13746 /* Check first to see if this is a constant offset from a @GOTOFF
13747 symbol reference. */
13748 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13749 && CONST_INT_P (op1))
13753 set_pic_reg_ever_live ();
13754 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13756 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13757 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13758 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13762 emit_move_insn (reg, new_rtx);
13768 if (INTVAL (op1) < -16*1024*1024
13769 || INTVAL (op1) >= 16*1024*1024)
13771 if (!x86_64_immediate_operand (op1, Pmode))
13772 op1 = force_reg (Pmode, op1);
13773 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13779 rtx base = legitimize_pic_address (op0, reg);
13780 machine_mode mode = GET_MODE (base);
13782 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13784 if (CONST_INT_P (new_rtx))
13786 if (INTVAL (new_rtx) < -16*1024*1024
13787 || INTVAL (new_rtx) >= 16*1024*1024)
13789 if (!x86_64_immediate_operand (new_rtx, mode))
13790 new_rtx = force_reg (mode, new_rtx);
13792 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13795 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13799 if (GET_CODE (new_rtx) == PLUS
13800 && CONSTANT_P (XEXP (new_rtx, 1)))
13802 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13803 new_rtx = XEXP (new_rtx, 1);
13805 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13813 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13816 get_thread_pointer (machine_mode tp_mode, bool to_reg)
13818 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13820 if (GET_MODE (tp) != tp_mode)
13822 gcc_assert (GET_MODE (tp) == SImode);
13823 gcc_assert (tp_mode == DImode);
13825 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13829 tp = copy_to_mode_reg (tp_mode, tp);
13834 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13836 static GTY(()) rtx ix86_tls_symbol;
13839 ix86_tls_get_addr (void)
13841 if (!ix86_tls_symbol)
13844 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13845 ? "___tls_get_addr" : "__tls_get_addr");
13847 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13850 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13852 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13854 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13855 gen_rtx_CONST (Pmode, unspec));
13858 return ix86_tls_symbol;
13861 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13863 static GTY(()) rtx ix86_tls_module_base_symbol;
13866 ix86_tls_module_base (void)
13868 if (!ix86_tls_module_base_symbol)
13870 ix86_tls_module_base_symbol
13871 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13873 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13874 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13877 return ix86_tls_module_base_symbol;
13880 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13881 false if we expect this to be used for a memory address and true if
13882 we expect to load the address into a register. */
13885 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13887 rtx dest, base, off;
13888 rtx pic = NULL_RTX, tp = NULL_RTX;
13889 machine_mode tp_mode = Pmode;
13892 /* Fall back to global dynamic model if tool chain cannot support local
13894 if (TARGET_SUN_TLS && !TARGET_64BIT
13895 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13896 && model == TLS_MODEL_LOCAL_DYNAMIC)
13897 model = TLS_MODEL_GLOBAL_DYNAMIC;
13901 case TLS_MODEL_GLOBAL_DYNAMIC:
13902 dest = gen_reg_rtx (Pmode);
13906 if (flag_pic && !TARGET_PECOFF)
13907 pic = pic_offset_table_rtx;
13910 pic = gen_reg_rtx (Pmode);
13911 emit_insn (gen_set_got (pic));
13915 if (TARGET_GNU2_TLS)
13918 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13920 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13922 tp = get_thread_pointer (Pmode, true);
13923 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13925 if (GET_MODE (x) != Pmode)
13926 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13928 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13932 rtx caddr = ix86_tls_get_addr ();
13936 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13941 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13942 insns = get_insns ();
13945 if (GET_MODE (x) != Pmode)
13946 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13948 RTL_CONST_CALL_P (insns) = 1;
13949 emit_libcall_block (insns, dest, rax, x);
13952 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13956 case TLS_MODEL_LOCAL_DYNAMIC:
13957 base = gen_reg_rtx (Pmode);
13962 pic = pic_offset_table_rtx;
13965 pic = gen_reg_rtx (Pmode);
13966 emit_insn (gen_set_got (pic));
13970 if (TARGET_GNU2_TLS)
13972 rtx tmp = ix86_tls_module_base ();
13975 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13977 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13979 tp = get_thread_pointer (Pmode, true);
13980 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13981 gen_rtx_MINUS (Pmode, tmp, tp));
13985 rtx caddr = ix86_tls_get_addr ();
13989 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13995 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13996 insns = get_insns ();
13999 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
14000 share the LD_BASE result with other LD model accesses. */
14001 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
14002 UNSPEC_TLS_LD_BASE);
14004 RTL_CONST_CALL_P (insns) = 1;
14005 emit_libcall_block (insns, base, rax, eqv);
14008 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
14011 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
14012 off = gen_rtx_CONST (Pmode, off);
14014 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
14016 if (TARGET_GNU2_TLS)
14018 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
14020 if (GET_MODE (x) != Pmode)
14021 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14023 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14027 case TLS_MODEL_INITIAL_EXEC:
14030 if (TARGET_SUN_TLS && !TARGET_X32)
14032 /* The Sun linker took the AMD64 TLS spec literally
14033 and can only handle %rax as destination of the
14034 initial executable code sequence. */
14036 dest = gen_reg_rtx (DImode);
14037 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14041 /* Generate DImode references to avoid %fs:(%reg32)
14042 problems and linker IE->LE relaxation bug. */
14045 type = UNSPEC_GOTNTPOFF;
14049 set_pic_reg_ever_live ();
14050 pic = pic_offset_table_rtx;
14051 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14053 else if (!TARGET_ANY_GNU_TLS)
14055 pic = gen_reg_rtx (Pmode);
14056 emit_insn (gen_set_got (pic));
14057 type = UNSPEC_GOTTPOFF;
14062 type = UNSPEC_INDNTPOFF;
14065 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14066 off = gen_rtx_CONST (tp_mode, off);
14068 off = gen_rtx_PLUS (tp_mode, pic, off);
14069 off = gen_const_mem (tp_mode, off);
14070 set_mem_alias_set (off, ix86_GOT_alias_set ());
14072 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14074 base = get_thread_pointer (tp_mode,
14075 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14076 off = force_reg (tp_mode, off);
14077 return gen_rtx_PLUS (tp_mode, base, off);
14081 base = get_thread_pointer (Pmode, true);
14082 dest = gen_reg_rtx (Pmode);
14083 emit_insn (ix86_gen_sub3 (dest, base, off));
14087 case TLS_MODEL_LOCAL_EXEC:
14088 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14089 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14090 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14091 off = gen_rtx_CONST (Pmode, off);
14093 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14095 base = get_thread_pointer (Pmode,
14096 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14097 return gen_rtx_PLUS (Pmode, base, off);
14101 base = get_thread_pointer (Pmode, true);
14102 dest = gen_reg_rtx (Pmode);
14103 emit_insn (ix86_gen_sub3 (dest, base, off));
14108 gcc_unreachable ();
14114 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14115 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14116 unique refptr-DECL symbol corresponding to symbol DECL. */
14118 struct dllimport_hasher : ggc_cache_hasher<tree_map *>
14120 static inline hashval_t hash (tree_map *m) { return m->hash; }
14122 equal (tree_map *a, tree_map *b)
14124 return a->base.from == b->base.from;
14128 handle_cache_entry (tree_map *&m)
14130 extern void gt_ggc_mx (tree_map *&);
14131 if (m == HTAB_EMPTY_ENTRY || m == HTAB_DELETED_ENTRY)
14133 else if (ggc_marked_p (m->base.from))
14136 m = static_cast<tree_map *> (HTAB_DELETED_ENTRY);
14140 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14143 get_dllimport_decl (tree decl, bool beimport)
14145 struct tree_map *h, in;
14147 const char *prefix;
14148 size_t namelen, prefixlen;
14153 if (!dllimport_map)
14154 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14156 in.hash = htab_hash_pointer (decl);
14157 in.base.from = decl;
14158 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14163 *loc = h = ggc_alloc<tree_map> ();
14165 h->base.from = decl;
14166 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14167 VAR_DECL, NULL, ptr_type_node);
14168 DECL_ARTIFICIAL (to) = 1;
14169 DECL_IGNORED_P (to) = 1;
14170 DECL_EXTERNAL (to) = 1;
14171 TREE_READONLY (to) = 1;
14173 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14174 name = targetm.strip_name_encoding (name);
14176 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14177 ? "*__imp_" : "*__imp__";
14179 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14180 namelen = strlen (name);
14181 prefixlen = strlen (prefix);
14182 imp_name = (char *) alloca (namelen + prefixlen + 1);
14183 memcpy (imp_name, prefix, prefixlen);
14184 memcpy (imp_name + prefixlen, name, namelen + 1);
14186 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14187 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14188 SET_SYMBOL_REF_DECL (rtl, to);
14189 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14192 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14193 #ifdef SUB_TARGET_RECORD_STUB
14194 SUB_TARGET_RECORD_STUB (name);
14198 rtl = gen_const_mem (Pmode, rtl);
14199 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14201 SET_DECL_RTL (to, rtl);
14202 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14207 /* Expand SYMBOL into its corresponding far-addresse symbol.
14208 WANT_REG is true if we require the result be a register. */
14211 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14216 gcc_assert (SYMBOL_REF_DECL (symbol));
14217 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14219 x = DECL_RTL (imp_decl);
14221 x = force_reg (Pmode, x);
14225 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14226 true if we require the result be a register. */
14229 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14234 gcc_assert (SYMBOL_REF_DECL (symbol));
14235 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14237 x = DECL_RTL (imp_decl);
14239 x = force_reg (Pmode, x);
14243 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14244 is true if we require the result be a register. */
14247 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14249 if (!TARGET_PECOFF)
14252 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14254 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14255 return legitimize_dllimport_symbol (addr, inreg);
14256 if (GET_CODE (addr) == CONST
14257 && GET_CODE (XEXP (addr, 0)) == PLUS
14258 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14259 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14261 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14262 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14266 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14268 if (GET_CODE (addr) == SYMBOL_REF
14269 && !is_imported_p (addr)
14270 && SYMBOL_REF_EXTERNAL_P (addr)
14271 && SYMBOL_REF_DECL (addr))
14272 return legitimize_pe_coff_extern_decl (addr, inreg);
14274 if (GET_CODE (addr) == CONST
14275 && GET_CODE (XEXP (addr, 0)) == PLUS
14276 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14277 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14278 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14279 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14281 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14282 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14287 /* Try machine-dependent ways of modifying an illegitimate address
14288 to be legitimate. If we find one, return the new, valid address.
14289 This macro is used in only one place: `memory_address' in explow.c.
14291 OLDX is the address as it was before break_out_memory_refs was called.
14292 In some cases it is useful to look at this to decide what needs to be done.
14294 It is always safe for this macro to do nothing. It exists to recognize
14295 opportunities to optimize the output.
14297 For the 80386, we handle X+REG by loading X into a register R and
14298 using R+REG. R will go in a general reg and indexing will be used.
14299 However, if REG is a broken-out memory address or multiplication,
14300 nothing needs to be done because REG can certainly go in a general reg.
14302 When -fpic is used, special handling is needed for symbolic references.
14303 See comments by legitimize_pic_address in i386.c for details. */
14306 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14308 bool changed = false;
14311 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14313 return legitimize_tls_address (x, (enum tls_model) log, false);
14314 if (GET_CODE (x) == CONST
14315 && GET_CODE (XEXP (x, 0)) == PLUS
14316 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14317 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14319 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14320 (enum tls_model) log, false);
14321 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14324 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14326 rtx tmp = legitimize_pe_coff_symbol (x, true);
14331 if (flag_pic && SYMBOLIC_CONST (x))
14332 return legitimize_pic_address (x, 0);
14335 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14336 return machopic_indirect_data_reference (x, 0);
14339 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14340 if (GET_CODE (x) == ASHIFT
14341 && CONST_INT_P (XEXP (x, 1))
14342 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14345 log = INTVAL (XEXP (x, 1));
14346 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14347 GEN_INT (1 << log));
14350 if (GET_CODE (x) == PLUS)
14352 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14354 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14355 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14356 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14359 log = INTVAL (XEXP (XEXP (x, 0), 1));
14360 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14361 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14362 GEN_INT (1 << log));
14365 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14366 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14367 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14370 log = INTVAL (XEXP (XEXP (x, 1), 1));
14371 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14372 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14373 GEN_INT (1 << log));
14376 /* Put multiply first if it isn't already. */
14377 if (GET_CODE (XEXP (x, 1)) == MULT)
14379 std::swap (XEXP (x, 0), XEXP (x, 1));
14383 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14384 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14385 created by virtual register instantiation, register elimination, and
14386 similar optimizations. */
14387 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14390 x = gen_rtx_PLUS (Pmode,
14391 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14392 XEXP (XEXP (x, 1), 0)),
14393 XEXP (XEXP (x, 1), 1));
14397 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14398 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14399 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14400 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14401 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14402 && CONSTANT_P (XEXP (x, 1)))
14405 rtx other = NULL_RTX;
14407 if (CONST_INT_P (XEXP (x, 1)))
14409 constant = XEXP (x, 1);
14410 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14412 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14414 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14415 other = XEXP (x, 1);
14423 x = gen_rtx_PLUS (Pmode,
14424 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14425 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14426 plus_constant (Pmode, other,
14427 INTVAL (constant)));
14431 if (changed && ix86_legitimate_address_p (mode, x, false))
14434 if (GET_CODE (XEXP (x, 0)) == MULT)
14437 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14440 if (GET_CODE (XEXP (x, 1)) == MULT)
14443 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14447 && REG_P (XEXP (x, 1))
14448 && REG_P (XEXP (x, 0)))
14451 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14454 x = legitimize_pic_address (x, 0);
14457 if (changed && ix86_legitimate_address_p (mode, x, false))
14460 if (REG_P (XEXP (x, 0)))
14462 rtx temp = gen_reg_rtx (Pmode);
14463 rtx val = force_operand (XEXP (x, 1), temp);
14466 val = convert_to_mode (Pmode, val, 1);
14467 emit_move_insn (temp, val);
14470 XEXP (x, 1) = temp;
14474 else if (REG_P (XEXP (x, 1)))
14476 rtx temp = gen_reg_rtx (Pmode);
14477 rtx val = force_operand (XEXP (x, 0), temp);
14480 val = convert_to_mode (Pmode, val, 1);
14481 emit_move_insn (temp, val);
14484 XEXP (x, 0) = temp;
14492 /* Print an integer constant expression in assembler syntax. Addition
14493 and subtraction are the only arithmetic that may appear in these
14494 expressions. FILE is the stdio stream to write to, X is the rtx, and
14495 CODE is the operand print code from the output string. */
14498 output_pic_addr_const (FILE *file, rtx x, int code)
14502 switch (GET_CODE (x))
14505 gcc_assert (flag_pic);
14510 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14511 output_addr_const (file, x);
14514 const char *name = XSTR (x, 0);
14516 /* Mark the decl as referenced so that cgraph will
14517 output the function. */
14518 if (SYMBOL_REF_DECL (x))
14519 mark_decl_referenced (SYMBOL_REF_DECL (x));
14522 if (MACHOPIC_INDIRECT
14523 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14524 name = machopic_indirection_name (x, /*stub_p=*/true);
14526 assemble_name (file, name);
14528 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14529 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14530 fputs ("@PLT", file);
14537 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14538 assemble_name (asm_out_file, buf);
14542 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14546 /* This used to output parentheses around the expression,
14547 but that does not work on the 386 (either ATT or BSD assembler). */
14548 output_pic_addr_const (file, XEXP (x, 0), code);
14552 if (GET_MODE (x) == VOIDmode)
14554 /* We can use %d if the number is <32 bits and positive. */
14555 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14556 fprintf (file, "0x%lx%08lx",
14557 (unsigned long) CONST_DOUBLE_HIGH (x),
14558 (unsigned long) CONST_DOUBLE_LOW (x));
14560 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14563 /* We can't handle floating point constants;
14564 TARGET_PRINT_OPERAND must handle them. */
14565 output_operand_lossage ("floating constant misused");
14569 /* Some assemblers need integer constants to appear first. */
14570 if (CONST_INT_P (XEXP (x, 0)))
14572 output_pic_addr_const (file, XEXP (x, 0), code);
14574 output_pic_addr_const (file, XEXP (x, 1), code);
14578 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14579 output_pic_addr_const (file, XEXP (x, 1), code);
14581 output_pic_addr_const (file, XEXP (x, 0), code);
14587 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14588 output_pic_addr_const (file, XEXP (x, 0), code);
14590 output_pic_addr_const (file, XEXP (x, 1), code);
14592 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14596 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14598 bool f = i386_asm_output_addr_const_extra (file, x);
14603 gcc_assert (XVECLEN (x, 0) == 1);
14604 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14605 switch (XINT (x, 1))
14608 fputs ("@GOT", file);
14610 case UNSPEC_GOTOFF:
14611 fputs ("@GOTOFF", file);
14613 case UNSPEC_PLTOFF:
14614 fputs ("@PLTOFF", file);
14617 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14618 "(%rip)" : "[rip]", file);
14620 case UNSPEC_GOTPCREL:
14621 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14622 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14624 case UNSPEC_GOTTPOFF:
14625 /* FIXME: This might be @TPOFF in Sun ld too. */
14626 fputs ("@gottpoff", file);
14629 fputs ("@tpoff", file);
14631 case UNSPEC_NTPOFF:
14633 fputs ("@tpoff", file);
14635 fputs ("@ntpoff", file);
14637 case UNSPEC_DTPOFF:
14638 fputs ("@dtpoff", file);
14640 case UNSPEC_GOTNTPOFF:
14642 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14643 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14645 fputs ("@gotntpoff", file);
14647 case UNSPEC_INDNTPOFF:
14648 fputs ("@indntpoff", file);
14651 case UNSPEC_MACHOPIC_OFFSET:
14653 machopic_output_function_base_name (file);
14657 output_operand_lossage ("invalid UNSPEC as operand");
14663 output_operand_lossage ("invalid expression as operand");
14667 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14668 We need to emit DTP-relative relocations. */
14670 static void ATTRIBUTE_UNUSED
14671 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14673 fputs (ASM_LONG, file);
14674 output_addr_const (file, x);
14675 fputs ("@dtpoff", file);
14681 fputs (", 0", file);
14684 gcc_unreachable ();
14688 /* Return true if X is a representation of the PIC register. This copes
14689 with calls from ix86_find_base_term, where the register might have
14690 been replaced by a cselib value. */
14693 ix86_pic_register_p (rtx x)
14695 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14696 return (pic_offset_table_rtx
14697 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14698 else if (!REG_P (x))
14700 else if (pic_offset_table_rtx)
14702 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14704 if (HARD_REGISTER_P (x)
14705 && !HARD_REGISTER_P (pic_offset_table_rtx)
14706 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14711 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14714 /* Helper function for ix86_delegitimize_address.
14715 Attempt to delegitimize TLS local-exec accesses. */
14718 ix86_delegitimize_tls_address (rtx orig_x)
14720 rtx x = orig_x, unspec;
14721 struct ix86_address addr;
14723 if (!TARGET_TLS_DIRECT_SEG_REFS)
14727 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14729 if (ix86_decompose_address (x, &addr) == 0
14730 || addr.seg != DEFAULT_TLS_SEG_REG
14731 || addr.disp == NULL_RTX
14732 || GET_CODE (addr.disp) != CONST)
14734 unspec = XEXP (addr.disp, 0);
14735 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14736 unspec = XEXP (unspec, 0);
14737 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14739 x = XVECEXP (unspec, 0, 0);
14740 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14741 if (unspec != XEXP (addr.disp, 0))
14742 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14745 rtx idx = addr.index;
14746 if (addr.scale != 1)
14747 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14748 x = gen_rtx_PLUS (Pmode, idx, x);
14751 x = gen_rtx_PLUS (Pmode, addr.base, x);
14752 if (MEM_P (orig_x))
14753 x = replace_equiv_address_nv (orig_x, x);
14757 /* In the name of slightly smaller debug output, and to cater to
14758 general assembler lossage, recognize PIC+GOTOFF and turn it back
14759 into a direct symbol reference.
14761 On Darwin, this is necessary to avoid a crash, because Darwin
14762 has a different PIC label for each routine but the DWARF debugging
14763 information is not associated with any particular routine, so it's
14764 necessary to remove references to the PIC label from RTL stored by
14765 the DWARF output code. */
14768 ix86_delegitimize_address (rtx x)
14770 rtx orig_x = delegitimize_mem_from_attrs (x);
14771 /* addend is NULL or some rtx if x is something+GOTOFF where
14772 something doesn't include the PIC register. */
14773 rtx addend = NULL_RTX;
14774 /* reg_addend is NULL or a multiple of some register. */
14775 rtx reg_addend = NULL_RTX;
14776 /* const_addend is NULL or a const_int. */
14777 rtx const_addend = NULL_RTX;
14778 /* This is the result, or NULL. */
14779 rtx result = NULL_RTX;
14788 if (GET_CODE (x) == CONST
14789 && GET_CODE (XEXP (x, 0)) == PLUS
14790 && GET_MODE (XEXP (x, 0)) == Pmode
14791 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14792 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14793 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14795 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14796 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14797 if (MEM_P (orig_x))
14798 x = replace_equiv_address_nv (orig_x, x);
14802 if (GET_CODE (x) == CONST
14803 && GET_CODE (XEXP (x, 0)) == UNSPEC
14804 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14805 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14806 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14808 x = XVECEXP (XEXP (x, 0), 0, 0);
14809 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14811 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14819 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14820 return ix86_delegitimize_tls_address (orig_x);
14822 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14823 and -mcmodel=medium -fpic. */
14826 if (GET_CODE (x) != PLUS
14827 || GET_CODE (XEXP (x, 1)) != CONST)
14828 return ix86_delegitimize_tls_address (orig_x);
14830 if (ix86_pic_register_p (XEXP (x, 0)))
14831 /* %ebx + GOT/GOTOFF */
14833 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14835 /* %ebx + %reg * scale + GOT/GOTOFF */
14836 reg_addend = XEXP (x, 0);
14837 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14838 reg_addend = XEXP (reg_addend, 1);
14839 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14840 reg_addend = XEXP (reg_addend, 0);
14843 reg_addend = NULL_RTX;
14844 addend = XEXP (x, 0);
14848 addend = XEXP (x, 0);
14850 x = XEXP (XEXP (x, 1), 0);
14851 if (GET_CODE (x) == PLUS
14852 && CONST_INT_P (XEXP (x, 1)))
14854 const_addend = XEXP (x, 1);
14858 if (GET_CODE (x) == UNSPEC
14859 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14860 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14861 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14862 && !MEM_P (orig_x) && !addend)))
14863 result = XVECEXP (x, 0, 0);
14865 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14866 && !MEM_P (orig_x))
14867 result = XVECEXP (x, 0, 0);
14870 return ix86_delegitimize_tls_address (orig_x);
14873 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14875 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14878 /* If the rest of original X doesn't involve the PIC register, add
14879 addend and subtract pic_offset_table_rtx. This can happen e.g.
14881 leal (%ebx, %ecx, 4), %ecx
14883 movl foo@GOTOFF(%ecx), %edx
14884 in which case we return (%ecx - %ebx) + foo
14885 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
14886 and reload has completed. */
14887 if (pic_offset_table_rtx
14888 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
14889 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14890 pic_offset_table_rtx),
14892 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
14894 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
14895 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
14896 result = gen_rtx_PLUS (Pmode, tmp, result);
14901 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14903 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14904 if (result == NULL_RTX)
14910 /* If X is a machine specific address (i.e. a symbol or label being
14911 referenced as a displacement from the GOT implemented using an
14912 UNSPEC), then return the base term. Otherwise return X. */
14915 ix86_find_base_term (rtx x)
14921 if (GET_CODE (x) != CONST)
14923 term = XEXP (x, 0);
14924 if (GET_CODE (term) == PLUS
14925 && (CONST_INT_P (XEXP (term, 1))
14926 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14927 term = XEXP (term, 0);
14928 if (GET_CODE (term) != UNSPEC
14929 || (XINT (term, 1) != UNSPEC_GOTPCREL
14930 && XINT (term, 1) != UNSPEC_PCREL))
14933 return XVECEXP (term, 0, 0);
14936 return ix86_delegitimize_address (x);
14940 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
14941 bool fp, FILE *file)
14943 const char *suffix;
14945 if (mode == CCFPmode || mode == CCFPUmode)
14947 code = ix86_fp_compare_code_to_integer (code);
14951 code = reverse_condition (code);
15002 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
15006 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
15007 Those same assemblers have the same but opposite lossage on cmov. */
15008 if (mode == CCmode)
15009 suffix = fp ? "nbe" : "a";
15011 gcc_unreachable ();
15027 gcc_unreachable ();
15031 if (mode == CCmode)
15033 else if (mode == CCCmode)
15034 suffix = fp ? "b" : "c";
15036 gcc_unreachable ();
15052 gcc_unreachable ();
15056 if (mode == CCmode)
15058 else if (mode == CCCmode)
15059 suffix = fp ? "nb" : "nc";
15061 gcc_unreachable ();
15064 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15068 if (mode == CCmode)
15071 gcc_unreachable ();
15074 suffix = fp ? "u" : "p";
15077 suffix = fp ? "nu" : "np";
15080 gcc_unreachable ();
15082 fputs (suffix, file);
15085 /* Print the name of register X to FILE based on its machine mode and number.
15086 If CODE is 'w', pretend the mode is HImode.
15087 If CODE is 'b', pretend the mode is QImode.
15088 If CODE is 'k', pretend the mode is SImode.
15089 If CODE is 'q', pretend the mode is DImode.
15090 If CODE is 'x', pretend the mode is V4SFmode.
15091 If CODE is 't', pretend the mode is V8SFmode.
15092 If CODE is 'g', pretend the mode is V16SFmode.
15093 If CODE is 'h', pretend the reg is the 'high' byte register.
15094 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15095 If CODE is 'd', duplicate the operand for AVX instruction.
15099 print_reg (rtx x, int code, FILE *file)
15102 unsigned int regno;
15103 bool duplicated = code == 'd' && TARGET_AVX;
15105 if (ASSEMBLER_DIALECT == ASM_ATT)
15110 gcc_assert (TARGET_64BIT);
15111 fputs ("rip", file);
15115 regno = true_regnum (x);
15116 gcc_assert (regno != ARG_POINTER_REGNUM
15117 && regno != FRAME_POINTER_REGNUM
15118 && regno != FLAGS_REG
15119 && regno != FPSR_REG
15120 && regno != FPCR_REG);
15122 if (code == 'w' || MMX_REG_P (x))
15124 else if (code == 'b')
15126 else if (code == 'k')
15128 else if (code == 'q')
15130 else if (code == 'y')
15132 else if (code == 'h')
15134 else if (code == 'x')
15136 else if (code == 't')
15138 else if (code == 'g')
15141 code = GET_MODE_SIZE (GET_MODE (x));
15143 /* Irritatingly, AMD extended registers use different naming convention
15144 from the normal registers: "r%d[bwd]" */
15145 if (REX_INT_REGNO_P (regno))
15147 gcc_assert (TARGET_64BIT);
15149 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
15153 error ("extended registers have no high halves");
15168 error ("unsupported operand size for extended register");
15178 if (STACK_TOP_P (x))
15187 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x) && ! ANY_BND_REG_P (x))
15188 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
15193 reg = hi_reg_name[regno];
15196 if (regno >= ARRAY_SIZE (qi_reg_name))
15198 reg = qi_reg_name[regno];
15201 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15203 reg = qi_high_reg_name[regno];
15208 gcc_assert (!duplicated);
15210 fputs (hi_reg_name[regno] + 1, file);
15216 gcc_assert (!duplicated);
15218 fputs (hi_reg_name[REGNO (x)] + 1, file);
15223 gcc_unreachable ();
15229 if (ASSEMBLER_DIALECT == ASM_ATT)
15230 fprintf (file, ", %%%s", reg);
15232 fprintf (file, ", %s", reg);
15236 /* Meaning of CODE:
15237 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15238 C -- print opcode suffix for set/cmov insn.
15239 c -- like C, but print reversed condition
15240 F,f -- likewise, but for floating-point.
15241 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15243 R -- print embeded rounding and sae.
15244 r -- print only sae.
15245 z -- print the opcode suffix for the size of the current operand.
15246 Z -- likewise, with special suffixes for x87 instructions.
15247 * -- print a star (in certain assembler syntax)
15248 A -- print an absolute memory reference.
15249 E -- print address with DImode register names if TARGET_64BIT.
15250 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15251 s -- print a shift double count, followed by the assemblers argument
15253 b -- print the QImode name of the register for the indicated operand.
15254 %b0 would print %al if operands[0] is reg 0.
15255 w -- likewise, print the HImode name of the register.
15256 k -- likewise, print the SImode name of the register.
15257 q -- likewise, print the DImode name of the register.
15258 x -- likewise, print the V4SFmode name of the register.
15259 t -- likewise, print the V8SFmode name of the register.
15260 g -- likewise, print the V16SFmode name of the register.
15261 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15262 y -- print "st(0)" instead of "st" as a register.
15263 d -- print duplicated register operand for AVX instruction.
15264 D -- print condition for SSE cmp instruction.
15265 P -- if PIC, print an @PLT suffix.
15266 p -- print raw symbol name.
15267 X -- don't print any sort of PIC '@' suffix for a symbol.
15268 & -- print some in-use local-dynamic symbol name.
15269 H -- print a memory address offset by 8; used for sse high-parts
15270 Y -- print condition for XOP pcom* instruction.
15271 + -- print a branch hint as 'cs' or 'ds' prefix
15272 ; -- print a semicolon (after prefixes due to bug in older gas).
15273 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15274 @ -- print a segment register of thread base pointer load
15275 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15276 ! -- print MPX prefix for jxx/call/ret instructions if required.
15280 ix86_print_operand (FILE *file, rtx x, int code)
15287 switch (ASSEMBLER_DIALECT)
15294 /* Intel syntax. For absolute addresses, registers should not
15295 be surrounded by braces. */
15299 ix86_print_operand (file, x, 0);
15306 gcc_unreachable ();
15309 ix86_print_operand (file, x, 0);
15313 /* Wrap address in an UNSPEC to declare special handling. */
15315 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15317 output_address (x);
15321 if (ASSEMBLER_DIALECT == ASM_ATT)
15326 if (ASSEMBLER_DIALECT == ASM_ATT)
15331 if (ASSEMBLER_DIALECT == ASM_ATT)
15336 if (ASSEMBLER_DIALECT == ASM_ATT)
15341 if (ASSEMBLER_DIALECT == ASM_ATT)
15346 if (ASSEMBLER_DIALECT == ASM_ATT)
15351 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15352 if (ASSEMBLER_DIALECT != ASM_ATT)
15355 switch (GET_MODE_SIZE (GET_MODE (x)))
15370 output_operand_lossage
15371 ("invalid operand size for operand code 'O'");
15380 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15382 /* Opcodes don't get size suffixes if using Intel opcodes. */
15383 if (ASSEMBLER_DIALECT == ASM_INTEL)
15386 switch (GET_MODE_SIZE (GET_MODE (x)))
15405 output_operand_lossage
15406 ("invalid operand size for operand code 'z'");
15411 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15413 (0, "non-integer operand used with operand code 'z'");
15417 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15418 if (ASSEMBLER_DIALECT == ASM_INTEL)
15421 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15423 switch (GET_MODE_SIZE (GET_MODE (x)))
15426 #ifdef HAVE_AS_IX86_FILDS
15436 #ifdef HAVE_AS_IX86_FILDQ
15439 fputs ("ll", file);
15447 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15449 /* 387 opcodes don't get size suffixes
15450 if the operands are registers. */
15451 if (STACK_REG_P (x))
15454 switch (GET_MODE_SIZE (GET_MODE (x)))
15475 output_operand_lossage
15476 ("invalid operand type used with operand code 'Z'");
15480 output_operand_lossage
15481 ("invalid operand size for operand code 'Z'");
15500 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15502 ix86_print_operand (file, x, 0);
15503 fputs (", ", file);
15508 switch (GET_CODE (x))
15511 fputs ("neq", file);
15514 fputs ("eq", file);
15518 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15522 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15526 fputs ("le", file);
15530 fputs ("lt", file);
15533 fputs ("unord", file);
15536 fputs ("ord", file);
15539 fputs ("ueq", file);
15542 fputs ("nlt", file);
15545 fputs ("nle", file);
15548 fputs ("ule", file);
15551 fputs ("ult", file);
15554 fputs ("une", file);
15557 output_operand_lossage ("operand is not a condition code, "
15558 "invalid operand code 'Y'");
15564 /* Little bit of braindamage here. The SSE compare instructions
15565 does use completely different names for the comparisons that the
15566 fp conditional moves. */
15567 switch (GET_CODE (x))
15572 fputs ("eq_us", file);
15576 fputs ("eq", file);
15581 fputs ("nge", file);
15585 fputs ("lt", file);
15590 fputs ("ngt", file);
15594 fputs ("le", file);
15597 fputs ("unord", file);
15602 fputs ("neq_oq", file);
15606 fputs ("neq", file);
15611 fputs ("ge", file);
15615 fputs ("nlt", file);
15620 fputs ("gt", file);
15624 fputs ("nle", file);
15627 fputs ("ord", file);
15630 output_operand_lossage ("operand is not a condition code, "
15631 "invalid operand code 'D'");
15638 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15639 if (ASSEMBLER_DIALECT == ASM_ATT)
15645 if (!COMPARISON_P (x))
15647 output_operand_lossage ("operand is not a condition code, "
15648 "invalid operand code '%c'", code);
15651 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15652 code == 'c' || code == 'f',
15653 code == 'F' || code == 'f',
15658 if (!offsettable_memref_p (x))
15660 output_operand_lossage ("operand is not an offsettable memory "
15661 "reference, invalid operand code 'H'");
15664 /* It doesn't actually matter what mode we use here, as we're
15665 only going to use this for printing. */
15666 x = adjust_address_nv (x, DImode, 8);
15667 /* Output 'qword ptr' for intel assembler dialect. */
15668 if (ASSEMBLER_DIALECT == ASM_INTEL)
15673 gcc_assert (CONST_INT_P (x));
15675 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15676 #ifdef HAVE_AS_IX86_HLE
15677 fputs ("xacquire ", file);
15679 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15681 else if (INTVAL (x) & IX86_HLE_RELEASE)
15682 #ifdef HAVE_AS_IX86_HLE
15683 fputs ("xrelease ", file);
15685 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15687 /* We do not want to print value of the operand. */
15691 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15692 fputs ("{z}", file);
15696 gcc_assert (CONST_INT_P (x));
15697 gcc_assert (INTVAL (x) == ROUND_SAE);
15699 if (ASSEMBLER_DIALECT == ASM_INTEL)
15700 fputs (", ", file);
15702 fputs ("{sae}", file);
15704 if (ASSEMBLER_DIALECT == ASM_ATT)
15705 fputs (", ", file);
15710 gcc_assert (CONST_INT_P (x));
15712 if (ASSEMBLER_DIALECT == ASM_INTEL)
15713 fputs (", ", file);
15715 switch (INTVAL (x))
15717 case ROUND_NEAREST_INT | ROUND_SAE:
15718 fputs ("{rn-sae}", file);
15720 case ROUND_NEG_INF | ROUND_SAE:
15721 fputs ("{rd-sae}", file);
15723 case ROUND_POS_INF | ROUND_SAE:
15724 fputs ("{ru-sae}", file);
15726 case ROUND_ZERO | ROUND_SAE:
15727 fputs ("{rz-sae}", file);
15730 gcc_unreachable ();
15733 if (ASSEMBLER_DIALECT == ASM_ATT)
15734 fputs (", ", file);
15739 if (ASSEMBLER_DIALECT == ASM_ATT)
15745 const char *name = get_some_local_dynamic_name ();
15747 output_operand_lossage ("'%%&' used without any "
15748 "local dynamic TLS references");
15750 assemble_name (file, name);
15759 || optimize_function_for_size_p (cfun)
15760 || !TARGET_BRANCH_PREDICTION_HINTS)
15763 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15766 int pred_val = XINT (x, 0);
15768 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15769 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15771 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15773 = final_forward_branch_p (current_output_insn) == 0;
15775 /* Emit hints only in the case default branch prediction
15776 heuristics would fail. */
15777 if (taken != cputaken)
15779 /* We use 3e (DS) prefix for taken branches and
15780 2e (CS) prefix for not taken branches. */
15782 fputs ("ds ; ", file);
15784 fputs ("cs ; ", file);
15792 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15798 if (ASSEMBLER_DIALECT == ASM_ATT)
15801 /* The kernel uses a different segment register for performance
15802 reasons; a system call would not have to trash the userspace
15803 segment register, which would be expensive. */
15804 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15805 fputs ("fs", file);
15807 fputs ("gs", file);
15811 putc (TARGET_AVX2 ? 'i' : 'f', file);
15815 if (TARGET_64BIT && Pmode != word_mode)
15816 fputs ("addr32 ", file);
15820 if (ix86_bnd_prefixed_insn_p (current_output_insn))
15821 fputs ("bnd ", file);
15825 output_operand_lossage ("invalid operand code '%c'", code);
15830 print_reg (x, code, file);
15832 else if (MEM_P (x))
15834 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15835 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15836 && GET_MODE (x) != BLKmode)
15839 switch (GET_MODE_SIZE (GET_MODE (x)))
15841 case 1: size = "BYTE"; break;
15842 case 2: size = "WORD"; break;
15843 case 4: size = "DWORD"; break;
15844 case 8: size = "QWORD"; break;
15845 case 12: size = "TBYTE"; break;
15847 if (GET_MODE (x) == XFmode)
15852 case 32: size = "YMMWORD"; break;
15853 case 64: size = "ZMMWORD"; break;
15855 gcc_unreachable ();
15858 /* Check for explicit size override (codes 'b', 'w', 'k',
15862 else if (code == 'w')
15864 else if (code == 'k')
15866 else if (code == 'q')
15868 else if (code == 'x')
15871 fputs (size, file);
15872 fputs (" PTR ", file);
15876 /* Avoid (%rip) for call operands. */
15877 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15878 && !CONST_INT_P (x))
15879 output_addr_const (file, x);
15880 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15881 output_operand_lossage ("invalid constraints for operand");
15883 output_address (x);
15886 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15891 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15892 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15894 if (ASSEMBLER_DIALECT == ASM_ATT)
15896 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15898 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15899 (unsigned long long) (int) l);
15901 fprintf (file, "0x%08x", (unsigned int) l);
15904 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15909 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15910 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15912 if (ASSEMBLER_DIALECT == ASM_ATT)
15914 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15917 /* These float cases don't actually occur as immediate operands. */
15918 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15922 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15923 fputs (dstr, file);
15928 /* We have patterns that allow zero sets of memory, for instance.
15929 In 64-bit mode, we should probably support all 8-byte vectors,
15930 since we can in fact encode that into an immediate. */
15931 if (GET_CODE (x) == CONST_VECTOR)
15933 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15937 if (code != 'P' && code != 'p')
15939 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15941 if (ASSEMBLER_DIALECT == ASM_ATT)
15944 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15945 || GET_CODE (x) == LABEL_REF)
15947 if (ASSEMBLER_DIALECT == ASM_ATT)
15950 fputs ("OFFSET FLAT:", file);
15953 if (CONST_INT_P (x))
15954 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15955 else if (flag_pic || MACHOPIC_INDIRECT)
15956 output_pic_addr_const (file, x, code);
15958 output_addr_const (file, x);
15963 ix86_print_operand_punct_valid_p (unsigned char code)
15965 return (code == '@' || code == '*' || code == '+' || code == '&'
15966 || code == ';' || code == '~' || code == '^' || code == '!');
15969 /* Print a memory operand whose address is ADDR. */
15972 ix86_print_operand_address (FILE *file, rtx addr)
15974 struct ix86_address parts;
15975 rtx base, index, disp;
15981 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15983 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15984 gcc_assert (parts.index == NULL_RTX);
15985 parts.index = XVECEXP (addr, 0, 1);
15986 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15987 addr = XVECEXP (addr, 0, 0);
15990 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15992 gcc_assert (TARGET_64BIT);
15993 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15996 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
15998 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
15999 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
16000 if (parts.base != NULL_RTX)
16002 parts.index = parts.base;
16005 parts.base = XVECEXP (addr, 0, 0);
16006 addr = XVECEXP (addr, 0, 0);
16008 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
16010 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16011 gcc_assert (parts.index == NULL_RTX);
16012 parts.index = XVECEXP (addr, 0, 1);
16013 addr = XVECEXP (addr, 0, 0);
16016 ok = ix86_decompose_address (addr, &parts);
16021 index = parts.index;
16023 scale = parts.scale;
16031 if (ASSEMBLER_DIALECT == ASM_ATT)
16033 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16036 gcc_unreachable ();
16039 /* Use one byte shorter RIP relative addressing for 64bit mode. */
16040 if (TARGET_64BIT && !base && !index)
16044 if (GET_CODE (disp) == CONST
16045 && GET_CODE (XEXP (disp, 0)) == PLUS
16046 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16047 symbol = XEXP (XEXP (disp, 0), 0);
16049 if (GET_CODE (symbol) == LABEL_REF
16050 || (GET_CODE (symbol) == SYMBOL_REF
16051 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16054 if (!base && !index)
16056 /* Displacement only requires special attention. */
16058 if (CONST_INT_P (disp))
16060 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16061 fputs ("ds:", file);
16062 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16065 output_pic_addr_const (file, disp, 0);
16067 output_addr_const (file, disp);
16071 /* Print SImode register names to force addr32 prefix. */
16072 if (SImode_address_operand (addr, VOIDmode))
16074 #ifdef ENABLE_CHECKING
16075 gcc_assert (TARGET_64BIT);
16076 switch (GET_CODE (addr))
16079 gcc_assert (GET_MODE (addr) == SImode);
16080 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16084 gcc_assert (GET_MODE (addr) == DImode);
16087 gcc_unreachable ();
16090 gcc_assert (!code);
16096 && CONST_INT_P (disp)
16097 && INTVAL (disp) < -16*1024*1024)
16099 /* X32 runs in 64-bit mode, where displacement, DISP, in
16100 address DISP(%r64), is encoded as 32-bit immediate sign-
16101 extended from 32-bit to 64-bit. For -0x40000300(%r64),
16102 address is %r64 + 0xffffffffbffffd00. When %r64 <
16103 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16104 which is invalid for x32. The correct address is %r64
16105 - 0x40000300 == 0xf7ffdd64. To properly encode
16106 -0x40000300(%r64) for x32, we zero-extend negative
16107 displacement by forcing addr32 prefix which truncates
16108 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16109 zero-extend all negative displacements, including -1(%rsp).
16110 However, for small negative displacements, sign-extension
16111 won't cause overflow. We only zero-extend negative
16112 displacements if they < -16*1024*1024, which is also used
16113 to check legitimate address displacements for PIC. */
16117 if (ASSEMBLER_DIALECT == ASM_ATT)
16122 output_pic_addr_const (file, disp, 0);
16123 else if (GET_CODE (disp) == LABEL_REF)
16124 output_asm_label (disp);
16126 output_addr_const (file, disp);
16131 print_reg (base, code, file);
16135 print_reg (index, vsib ? 0 : code, file);
16136 if (scale != 1 || vsib)
16137 fprintf (file, ",%d", scale);
16143 rtx offset = NULL_RTX;
16147 /* Pull out the offset of a symbol; print any symbol itself. */
16148 if (GET_CODE (disp) == CONST
16149 && GET_CODE (XEXP (disp, 0)) == PLUS
16150 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16152 offset = XEXP (XEXP (disp, 0), 1);
16153 disp = gen_rtx_CONST (VOIDmode,
16154 XEXP (XEXP (disp, 0), 0));
16158 output_pic_addr_const (file, disp, 0);
16159 else if (GET_CODE (disp) == LABEL_REF)
16160 output_asm_label (disp);
16161 else if (CONST_INT_P (disp))
16164 output_addr_const (file, disp);
16170 print_reg (base, code, file);
16173 if (INTVAL (offset) >= 0)
16175 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16179 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16186 print_reg (index, vsib ? 0 : code, file);
16187 if (scale != 1 || vsib)
16188 fprintf (file, "*%d", scale);
16195 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16198 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16202 if (GET_CODE (x) != UNSPEC)
16205 op = XVECEXP (x, 0, 0);
16206 switch (XINT (x, 1))
16208 case UNSPEC_GOTTPOFF:
16209 output_addr_const (file, op);
16210 /* FIXME: This might be @TPOFF in Sun ld. */
16211 fputs ("@gottpoff", file);
16214 output_addr_const (file, op);
16215 fputs ("@tpoff", file);
16217 case UNSPEC_NTPOFF:
16218 output_addr_const (file, op);
16220 fputs ("@tpoff", file);
16222 fputs ("@ntpoff", file);
16224 case UNSPEC_DTPOFF:
16225 output_addr_const (file, op);
16226 fputs ("@dtpoff", file);
16228 case UNSPEC_GOTNTPOFF:
16229 output_addr_const (file, op);
16231 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16232 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16234 fputs ("@gotntpoff", file);
16236 case UNSPEC_INDNTPOFF:
16237 output_addr_const (file, op);
16238 fputs ("@indntpoff", file);
16241 case UNSPEC_MACHOPIC_OFFSET:
16242 output_addr_const (file, op);
16244 machopic_output_function_base_name (file);
16248 case UNSPEC_STACK_CHECK:
16252 gcc_assert (flag_split_stack);
16254 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16255 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16257 gcc_unreachable ();
16260 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16271 /* Split one or more double-mode RTL references into pairs of half-mode
16272 references. The RTL can be REG, offsettable MEM, integer constant, or
16273 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16274 split and "num" is its length. lo_half and hi_half are output arrays
16275 that parallel "operands". */
16278 split_double_mode (machine_mode mode, rtx operands[],
16279 int num, rtx lo_half[], rtx hi_half[])
16281 machine_mode half_mode;
16287 half_mode = DImode;
16290 half_mode = SImode;
16293 gcc_unreachable ();
16296 byte = GET_MODE_SIZE (half_mode);
16300 rtx op = operands[num];
16302 /* simplify_subreg refuse to split volatile memory addresses,
16303 but we still have to handle it. */
16306 lo_half[num] = adjust_address (op, half_mode, 0);
16307 hi_half[num] = adjust_address (op, half_mode, byte);
16311 lo_half[num] = simplify_gen_subreg (half_mode, op,
16312 GET_MODE (op) == VOIDmode
16313 ? mode : GET_MODE (op), 0);
16314 hi_half[num] = simplify_gen_subreg (half_mode, op,
16315 GET_MODE (op) == VOIDmode
16316 ? mode : GET_MODE (op), byte);
16321 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16322 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16323 is the expression of the binary operation. The output may either be
16324 emitted here, or returned to the caller, like all output_* functions.
16326 There is no guarantee that the operands are the same mode, as they
16327 might be within FLOAT or FLOAT_EXTEND expressions. */
16329 #ifndef SYSV386_COMPAT
16330 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16331 wants to fix the assemblers because that causes incompatibility
16332 with gcc. No-one wants to fix gcc because that causes
16333 incompatibility with assemblers... You can use the option of
16334 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16335 #define SYSV386_COMPAT 1
16339 output_387_binary_op (rtx insn, rtx *operands)
16341 static char buf[40];
16344 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16346 #ifdef ENABLE_CHECKING
16347 /* Even if we do not want to check the inputs, this documents input
16348 constraints. Which helps in understanding the following code. */
16349 if (STACK_REG_P (operands[0])
16350 && ((REG_P (operands[1])
16351 && REGNO (operands[0]) == REGNO (operands[1])
16352 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16353 || (REG_P (operands[2])
16354 && REGNO (operands[0]) == REGNO (operands[2])
16355 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16356 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16359 gcc_assert (is_sse);
16362 switch (GET_CODE (operands[3]))
16365 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16366 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16374 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16375 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16383 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16384 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16392 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16393 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16401 gcc_unreachable ();
16408 strcpy (buf, ssep);
16409 if (GET_MODE (operands[0]) == SFmode)
16410 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16412 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16416 strcpy (buf, ssep + 1);
16417 if (GET_MODE (operands[0]) == SFmode)
16418 strcat (buf, "ss\t{%2, %0|%0, %2}");
16420 strcat (buf, "sd\t{%2, %0|%0, %2}");
16426 switch (GET_CODE (operands[3]))
16430 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16431 std::swap (operands[1], operands[2]);
16433 /* know operands[0] == operands[1]. */
16435 if (MEM_P (operands[2]))
16441 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16443 if (STACK_TOP_P (operands[0]))
16444 /* How is it that we are storing to a dead operand[2]?
16445 Well, presumably operands[1] is dead too. We can't
16446 store the result to st(0) as st(0) gets popped on this
16447 instruction. Instead store to operands[2] (which I
16448 think has to be st(1)). st(1) will be popped later.
16449 gcc <= 2.8.1 didn't have this check and generated
16450 assembly code that the Unixware assembler rejected. */
16451 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16453 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16457 if (STACK_TOP_P (operands[0]))
16458 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16460 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16465 if (MEM_P (operands[1]))
16471 if (MEM_P (operands[2]))
16477 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16480 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16481 derived assemblers, confusingly reverse the direction of
16482 the operation for fsub{r} and fdiv{r} when the
16483 destination register is not st(0). The Intel assembler
16484 doesn't have this brain damage. Read !SYSV386_COMPAT to
16485 figure out what the hardware really does. */
16486 if (STACK_TOP_P (operands[0]))
16487 p = "{p\t%0, %2|rp\t%2, %0}";
16489 p = "{rp\t%2, %0|p\t%0, %2}";
16491 if (STACK_TOP_P (operands[0]))
16492 /* As above for fmul/fadd, we can't store to st(0). */
16493 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16495 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16500 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16503 if (STACK_TOP_P (operands[0]))
16504 p = "{rp\t%0, %1|p\t%1, %0}";
16506 p = "{p\t%1, %0|rp\t%0, %1}";
16508 if (STACK_TOP_P (operands[0]))
16509 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16511 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16516 if (STACK_TOP_P (operands[0]))
16518 if (STACK_TOP_P (operands[1]))
16519 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16521 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16524 else if (STACK_TOP_P (operands[1]))
16527 p = "{\t%1, %0|r\t%0, %1}";
16529 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16535 p = "{r\t%2, %0|\t%0, %2}";
16537 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16543 gcc_unreachable ();
16550 /* Check if a 256bit AVX register is referenced inside of EXP. */
16553 ix86_check_avx256_register (const_rtx exp)
16555 if (GET_CODE (exp) == SUBREG)
16556 exp = SUBREG_REG (exp);
16558 return (REG_P (exp)
16559 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16562 /* Return needed mode for entity in optimize_mode_switching pass. */
16565 ix86_avx_u128_mode_needed (rtx_insn *insn)
16571 /* Needed mode is set to AVX_U128_CLEAN if there are
16572 no 256bit modes used in function arguments. */
16573 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16575 link = XEXP (link, 1))
16577 if (GET_CODE (XEXP (link, 0)) == USE)
16579 rtx arg = XEXP (XEXP (link, 0), 0);
16581 if (ix86_check_avx256_register (arg))
16582 return AVX_U128_DIRTY;
16586 return AVX_U128_CLEAN;
16589 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16590 changes state only when a 256bit register is written to, but we need
16591 to prevent the compiler from moving optimal insertion point above
16592 eventual read from 256bit register. */
16593 subrtx_iterator::array_type array;
16594 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16595 if (ix86_check_avx256_register (*iter))
16596 return AVX_U128_DIRTY;
16598 return AVX_U128_ANY;
16601 /* Return mode that i387 must be switched into
16602 prior to the execution of insn. */
16605 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16607 enum attr_i387_cw mode;
16609 /* The mode UNINITIALIZED is used to store control word after a
16610 function call or ASM pattern. The mode ANY specify that function
16611 has no requirements on the control word and make no changes in the
16612 bits we are interested in. */
16615 || (NONJUMP_INSN_P (insn)
16616 && (asm_noperands (PATTERN (insn)) >= 0
16617 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16618 return I387_CW_UNINITIALIZED;
16620 if (recog_memoized (insn) < 0)
16621 return I387_CW_ANY;
16623 mode = get_attr_i387_cw (insn);
16628 if (mode == I387_CW_TRUNC)
16633 if (mode == I387_CW_FLOOR)
16638 if (mode == I387_CW_CEIL)
16643 if (mode == I387_CW_MASK_PM)
16648 gcc_unreachable ();
16651 return I387_CW_ANY;
16654 /* Return mode that entity must be switched into
16655 prior to the execution of insn. */
16658 ix86_mode_needed (int entity, rtx_insn *insn)
16663 return ix86_avx_u128_mode_needed (insn);
16668 return ix86_i387_mode_needed (entity, insn);
16670 gcc_unreachable ();
16675 /* Check if a 256bit AVX register is referenced in stores. */
16678 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16680 if (ix86_check_avx256_register (dest))
16682 bool *used = (bool *) data;
16687 /* Calculate mode of upper 128bit AVX registers after the insn. */
16690 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16692 rtx pat = PATTERN (insn);
16694 if (vzeroupper_operation (pat, VOIDmode)
16695 || vzeroall_operation (pat, VOIDmode))
16696 return AVX_U128_CLEAN;
16698 /* We know that state is clean after CALL insn if there are no
16699 256bit registers used in the function return register. */
16702 bool avx_reg256_found = false;
16703 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16705 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16708 /* Otherwise, return current mode. Remember that if insn
16709 references AVX 256bit registers, the mode was already changed
16710 to DIRTY from MODE_NEEDED. */
16714 /* Return the mode that an insn results in. */
16717 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16722 return ix86_avx_u128_mode_after (mode, insn);
16729 gcc_unreachable ();
16734 ix86_avx_u128_mode_entry (void)
16738 /* Entry mode is set to AVX_U128_DIRTY if there are
16739 256bit modes used in function arguments. */
16740 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16741 arg = TREE_CHAIN (arg))
16743 rtx incoming = DECL_INCOMING_RTL (arg);
16745 if (incoming && ix86_check_avx256_register (incoming))
16746 return AVX_U128_DIRTY;
16749 return AVX_U128_CLEAN;
16752 /* Return a mode that ENTITY is assumed to be
16753 switched to at function entry. */
16756 ix86_mode_entry (int entity)
16761 return ix86_avx_u128_mode_entry ();
16766 return I387_CW_ANY;
16768 gcc_unreachable ();
16773 ix86_avx_u128_mode_exit (void)
16775 rtx reg = crtl->return_rtx;
16777 /* Exit mode is set to AVX_U128_DIRTY if there are
16778 256bit modes used in the function return register. */
16779 if (reg && ix86_check_avx256_register (reg))
16780 return AVX_U128_DIRTY;
16782 return AVX_U128_CLEAN;
16785 /* Return a mode that ENTITY is assumed to be
16786 switched to at function exit. */
16789 ix86_mode_exit (int entity)
16794 return ix86_avx_u128_mode_exit ();
16799 return I387_CW_ANY;
16801 gcc_unreachable ();
16806 ix86_mode_priority (int, int n)
16811 /* Output code to initialize control word copies used by trunc?f?i and
16812 rounding patterns. CURRENT_MODE is set to current control word,
16813 while NEW_MODE is set to new control word. */
16816 emit_i387_cw_initialization (int mode)
16818 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16821 enum ix86_stack_slot slot;
16823 rtx reg = gen_reg_rtx (HImode);
16825 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16826 emit_move_insn (reg, copy_rtx (stored_mode));
16828 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16829 || optimize_insn_for_size_p ())
16833 case I387_CW_TRUNC:
16834 /* round toward zero (truncate) */
16835 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16836 slot = SLOT_CW_TRUNC;
16839 case I387_CW_FLOOR:
16840 /* round down toward -oo */
16841 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16842 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16843 slot = SLOT_CW_FLOOR;
16847 /* round up toward +oo */
16848 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16849 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16850 slot = SLOT_CW_CEIL;
16853 case I387_CW_MASK_PM:
16854 /* mask precision exception for nearbyint() */
16855 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16856 slot = SLOT_CW_MASK_PM;
16860 gcc_unreachable ();
16867 case I387_CW_TRUNC:
16868 /* round toward zero (truncate) */
16869 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16870 slot = SLOT_CW_TRUNC;
16873 case I387_CW_FLOOR:
16874 /* round down toward -oo */
16875 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16876 slot = SLOT_CW_FLOOR;
16880 /* round up toward +oo */
16881 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16882 slot = SLOT_CW_CEIL;
16885 case I387_CW_MASK_PM:
16886 /* mask precision exception for nearbyint() */
16887 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16888 slot = SLOT_CW_MASK_PM;
16892 gcc_unreachable ();
16896 gcc_assert (slot < MAX_386_STACK_LOCALS);
16898 new_mode = assign_386_stack_local (HImode, slot);
16899 emit_move_insn (new_mode, reg);
16902 /* Emit vzeroupper. */
16905 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16909 /* Cancel automatic vzeroupper insertion if there are
16910 live call-saved SSE registers at the insertion point. */
16912 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16913 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16917 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16918 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16921 emit_insn (gen_avx_vzeroupper ());
16924 /* Generate one or more insns to set ENTITY to MODE. */
16926 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16927 is the set of hard registers live at the point where the insn(s)
16928 are to be inserted. */
16931 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16932 HARD_REG_SET regs_live)
16937 if (mode == AVX_U128_CLEAN)
16938 ix86_avx_emit_vzeroupper (regs_live);
16944 if (mode != I387_CW_ANY
16945 && mode != I387_CW_UNINITIALIZED)
16946 emit_i387_cw_initialization (mode);
16949 gcc_unreachable ();
16953 /* Output code for INSN to convert a float to a signed int. OPERANDS
16954 are the insn operands. The output may be [HSD]Imode and the input
16955 operand may be [SDX]Fmode. */
16958 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
16960 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16961 int dimode_p = GET_MODE (operands[0]) == DImode;
16962 int round_mode = get_attr_i387_cw (insn);
16964 /* Jump through a hoop or two for DImode, since the hardware has no
16965 non-popping instruction. We used to do this a different way, but
16966 that was somewhat fragile and broke with post-reload splitters. */
16967 if ((dimode_p || fisttp) && !stack_top_dies)
16968 output_asm_insn ("fld\t%y1", operands);
16970 gcc_assert (STACK_TOP_P (operands[1]));
16971 gcc_assert (MEM_P (operands[0]));
16972 gcc_assert (GET_MODE (operands[1]) != TFmode);
16975 output_asm_insn ("fisttp%Z0\t%0", operands);
16978 if (round_mode != I387_CW_ANY)
16979 output_asm_insn ("fldcw\t%3", operands);
16980 if (stack_top_dies || dimode_p)
16981 output_asm_insn ("fistp%Z0\t%0", operands);
16983 output_asm_insn ("fist%Z0\t%0", operands);
16984 if (round_mode != I387_CW_ANY)
16985 output_asm_insn ("fldcw\t%2", operands);
16991 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16992 have the values zero or one, indicates the ffreep insn's operand
16993 from the OPERANDS array. */
16995 static const char *
16996 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16998 if (TARGET_USE_FFREEP)
16999 #ifdef HAVE_AS_IX86_FFREEP
17000 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
17003 static char retval[32];
17004 int regno = REGNO (operands[opno]);
17006 gcc_assert (STACK_REGNO_P (regno));
17008 regno -= FIRST_STACK_REG;
17010 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
17015 return opno ? "fstp\t%y1" : "fstp\t%y0";
17019 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
17020 should be used. UNORDERED_P is true when fucom should be used. */
17023 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
17025 int stack_top_dies;
17026 rtx cmp_op0, cmp_op1;
17027 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
17031 cmp_op0 = operands[0];
17032 cmp_op1 = operands[1];
17036 cmp_op0 = operands[1];
17037 cmp_op1 = operands[2];
17042 if (GET_MODE (operands[0]) == SFmode)
17044 return "%vucomiss\t{%1, %0|%0, %1}";
17046 return "%vcomiss\t{%1, %0|%0, %1}";
17049 return "%vucomisd\t{%1, %0|%0, %1}";
17051 return "%vcomisd\t{%1, %0|%0, %1}";
17054 gcc_assert (STACK_TOP_P (cmp_op0));
17056 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17058 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17060 if (stack_top_dies)
17062 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17063 return output_387_ffreep (operands, 1);
17066 return "ftst\n\tfnstsw\t%0";
17069 if (STACK_REG_P (cmp_op1)
17071 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17072 && REGNO (cmp_op1) != FIRST_STACK_REG)
17074 /* If both the top of the 387 stack dies, and the other operand
17075 is also a stack register that dies, then this must be a
17076 `fcompp' float compare */
17080 /* There is no double popping fcomi variant. Fortunately,
17081 eflags is immune from the fstp's cc clobbering. */
17083 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17085 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17086 return output_387_ffreep (operands, 0);
17091 return "fucompp\n\tfnstsw\t%0";
17093 return "fcompp\n\tfnstsw\t%0";
17098 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
17100 static const char * const alt[16] =
17102 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17103 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17104 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17105 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17107 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17108 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17112 "fcomi\t{%y1, %0|%0, %y1}",
17113 "fcomip\t{%y1, %0|%0, %y1}",
17114 "fucomi\t{%y1, %0|%0, %y1}",
17115 "fucomip\t{%y1, %0|%0, %y1}",
17126 mask = eflags_p << 3;
17127 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17128 mask |= unordered_p << 1;
17129 mask |= stack_top_dies;
17131 gcc_assert (mask < 16);
17140 ix86_output_addr_vec_elt (FILE *file, int value)
17142 const char *directive = ASM_LONG;
17146 directive = ASM_QUAD;
17148 gcc_assert (!TARGET_64BIT);
17151 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17155 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17157 const char *directive = ASM_LONG;
17160 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17161 directive = ASM_QUAD;
17163 gcc_assert (!TARGET_64BIT);
17165 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17166 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17167 fprintf (file, "%s%s%d-%s%d\n",
17168 directive, LPREFIX, value, LPREFIX, rel);
17169 else if (HAVE_AS_GOTOFF_IN_DATA)
17170 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17172 else if (TARGET_MACHO)
17174 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17175 machopic_output_function_base_name (file);
17180 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17181 GOT_SYMBOL_NAME, LPREFIX, value);
17184 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17188 ix86_expand_clear (rtx dest)
17192 /* We play register width games, which are only valid after reload. */
17193 gcc_assert (reload_completed);
17195 /* Avoid HImode and its attendant prefix byte. */
17196 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17197 dest = gen_rtx_REG (SImode, REGNO (dest));
17198 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
17200 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17202 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17203 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17209 /* X is an unchanging MEM. If it is a constant pool reference, return
17210 the constant pool rtx, else NULL. */
17213 maybe_get_pool_constant (rtx x)
17215 x = ix86_delegitimize_address (XEXP (x, 0));
17217 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17218 return get_pool_constant (x);
17224 ix86_expand_move (machine_mode mode, rtx operands[])
17227 enum tls_model model;
17232 if (GET_CODE (op1) == SYMBOL_REF)
17236 model = SYMBOL_REF_TLS_MODEL (op1);
17239 op1 = legitimize_tls_address (op1, model, true);
17240 op1 = force_operand (op1, op0);
17243 op1 = convert_to_mode (mode, op1, 1);
17245 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17248 else if (GET_CODE (op1) == CONST
17249 && GET_CODE (XEXP (op1, 0)) == PLUS
17250 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17252 rtx addend = XEXP (XEXP (op1, 0), 1);
17253 rtx symbol = XEXP (XEXP (op1, 0), 0);
17256 model = SYMBOL_REF_TLS_MODEL (symbol);
17258 tmp = legitimize_tls_address (symbol, model, true);
17260 tmp = legitimize_pe_coff_symbol (symbol, true);
17264 tmp = force_operand (tmp, NULL);
17265 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17266 op0, 1, OPTAB_DIRECT);
17269 op1 = convert_to_mode (mode, tmp, 1);
17273 if ((flag_pic || MACHOPIC_INDIRECT)
17274 && symbolic_operand (op1, mode))
17276 if (TARGET_MACHO && !TARGET_64BIT)
17279 /* dynamic-no-pic */
17280 if (MACHOPIC_INDIRECT)
17282 rtx temp = ((reload_in_progress
17283 || ((op0 && REG_P (op0))
17285 ? op0 : gen_reg_rtx (Pmode));
17286 op1 = machopic_indirect_data_reference (op1, temp);
17288 op1 = machopic_legitimize_pic_address (op1, mode,
17289 temp == op1 ? 0 : temp);
17291 if (op0 != op1 && GET_CODE (op0) != MEM)
17293 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
17297 if (GET_CODE (op0) == MEM)
17298 op1 = force_reg (Pmode, op1);
17302 if (GET_CODE (temp) != REG)
17303 temp = gen_reg_rtx (Pmode);
17304 temp = legitimize_pic_address (op1, temp);
17309 /* dynamic-no-pic */
17315 op1 = force_reg (mode, op1);
17316 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17318 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17319 op1 = legitimize_pic_address (op1, reg);
17322 op1 = convert_to_mode (mode, op1, 1);
17329 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17330 || !push_operand (op0, mode))
17332 op1 = force_reg (mode, op1);
17334 if (push_operand (op0, mode)
17335 && ! general_no_elim_operand (op1, mode))
17336 op1 = copy_to_mode_reg (mode, op1);
17338 /* Force large constants in 64bit compilation into register
17339 to get them CSEed. */
17340 if (can_create_pseudo_p ()
17341 && (mode == DImode) && TARGET_64BIT
17342 && immediate_operand (op1, mode)
17343 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17344 && !register_operand (op0, mode)
17346 op1 = copy_to_mode_reg (mode, op1);
17348 if (can_create_pseudo_p ()
17349 && FLOAT_MODE_P (mode)
17350 && GET_CODE (op1) == CONST_DOUBLE)
17352 /* If we are loading a floating point constant to a register,
17353 force the value to memory now, since we'll get better code
17354 out the back end. */
17356 op1 = validize_mem (force_const_mem (mode, op1));
17357 if (!register_operand (op0, mode))
17359 rtx temp = gen_reg_rtx (mode);
17360 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
17361 emit_move_insn (op0, temp);
17367 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17371 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17373 rtx op0 = operands[0], op1 = operands[1];
17374 unsigned int align = GET_MODE_ALIGNMENT (mode);
17376 if (push_operand (op0, VOIDmode))
17377 op0 = emit_move_resolve_push (mode, op0);
17379 /* Force constants other than zero into memory. We do not know how
17380 the instructions used to build constants modify the upper 64 bits
17381 of the register, once we have that information we may be able
17382 to handle some of them more efficiently. */
17383 if (can_create_pseudo_p ()
17384 && register_operand (op0, mode)
17385 && (CONSTANT_P (op1)
17386 || (GET_CODE (op1) == SUBREG
17387 && CONSTANT_P (SUBREG_REG (op1))))
17388 && !standard_sse_constant_p (op1))
17389 op1 = validize_mem (force_const_mem (mode, op1));
17391 /* We need to check memory alignment for SSE mode since attribute
17392 can make operands unaligned. */
17393 if (can_create_pseudo_p ()
17394 && SSE_REG_MODE_P (mode)
17395 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17396 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17400 /* ix86_expand_vector_move_misalign() does not like constants ... */
17401 if (CONSTANT_P (op1)
17402 || (GET_CODE (op1) == SUBREG
17403 && CONSTANT_P (SUBREG_REG (op1))))
17404 op1 = validize_mem (force_const_mem (mode, op1));
17406 /* ... nor both arguments in memory. */
17407 if (!register_operand (op0, mode)
17408 && !register_operand (op1, mode))
17409 op1 = force_reg (mode, op1);
17411 tmp[0] = op0; tmp[1] = op1;
17412 ix86_expand_vector_move_misalign (mode, tmp);
17416 /* Make operand1 a register if it isn't already. */
17417 if (can_create_pseudo_p ()
17418 && !register_operand (op0, mode)
17419 && !register_operand (op1, mode))
17421 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17425 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17428 /* Split 32-byte AVX unaligned load and store if needed. */
17431 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17434 rtx (*extract) (rtx, rtx, rtx);
17435 rtx (*load_unaligned) (rtx, rtx);
17436 rtx (*store_unaligned) (rtx, rtx);
17439 switch (GET_MODE (op0))
17442 gcc_unreachable ();
17444 extract = gen_avx_vextractf128v32qi;
17445 load_unaligned = gen_avx_loaddquv32qi;
17446 store_unaligned = gen_avx_storedquv32qi;
17450 extract = gen_avx_vextractf128v8sf;
17451 load_unaligned = gen_avx_loadups256;
17452 store_unaligned = gen_avx_storeups256;
17456 extract = gen_avx_vextractf128v4df;
17457 load_unaligned = gen_avx_loadupd256;
17458 store_unaligned = gen_avx_storeupd256;
17465 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
17467 rtx r = gen_reg_rtx (mode);
17468 m = adjust_address (op1, mode, 0);
17469 emit_move_insn (r, m);
17470 m = adjust_address (op1, mode, 16);
17471 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17472 emit_move_insn (op0, r);
17474 /* Normal *mov<mode>_internal pattern will handle
17475 unaligned loads just fine if misaligned_operand
17476 is true, and without the UNSPEC it can be combined
17477 with arithmetic instructions. */
17478 else if (misaligned_operand (op1, GET_MODE (op1)))
17479 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17481 emit_insn (load_unaligned (op0, op1));
17483 else if (MEM_P (op0))
17485 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
17487 m = adjust_address (op0, mode, 0);
17488 emit_insn (extract (m, op1, const0_rtx));
17489 m = adjust_address (op0, mode, 16);
17490 emit_insn (extract (m, op1, const1_rtx));
17493 emit_insn (store_unaligned (op0, op1));
17496 gcc_unreachable ();
17499 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17500 straight to ix86_expand_vector_move. */
17501 /* Code generation for scalar reg-reg moves of single and double precision data:
17502 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17506 if (x86_sse_partial_reg_dependency == true)
17511 Code generation for scalar loads of double precision data:
17512 if (x86_sse_split_regs == true)
17513 movlpd mem, reg (gas syntax)
17517 Code generation for unaligned packed loads of single precision data
17518 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17519 if (x86_sse_unaligned_move_optimal)
17522 if (x86_sse_partial_reg_dependency == true)
17534 Code generation for unaligned packed loads of double precision data
17535 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17536 if (x86_sse_unaligned_move_optimal)
17539 if (x86_sse_split_regs == true)
17552 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17554 rtx op0, op1, orig_op0 = NULL_RTX, m;
17555 rtx (*load_unaligned) (rtx, rtx);
17556 rtx (*store_unaligned) (rtx, rtx);
17561 if (GET_MODE_SIZE (mode) == 64)
17563 switch (GET_MODE_CLASS (mode))
17565 case MODE_VECTOR_INT:
17567 if (GET_MODE (op0) != V16SImode)
17572 op0 = gen_reg_rtx (V16SImode);
17575 op0 = gen_lowpart (V16SImode, op0);
17577 op1 = gen_lowpart (V16SImode, op1);
17580 case MODE_VECTOR_FLOAT:
17581 switch (GET_MODE (op0))
17584 gcc_unreachable ();
17586 load_unaligned = gen_avx512f_loaddquv16si;
17587 store_unaligned = gen_avx512f_storedquv16si;
17590 load_unaligned = gen_avx512f_loadups512;
17591 store_unaligned = gen_avx512f_storeups512;
17594 load_unaligned = gen_avx512f_loadupd512;
17595 store_unaligned = gen_avx512f_storeupd512;
17600 emit_insn (load_unaligned (op0, op1));
17601 else if (MEM_P (op0))
17602 emit_insn (store_unaligned (op0, op1));
17604 gcc_unreachable ();
17606 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17610 gcc_unreachable ();
17617 && GET_MODE_SIZE (mode) == 32)
17619 switch (GET_MODE_CLASS (mode))
17621 case MODE_VECTOR_INT:
17623 if (GET_MODE (op0) != V32QImode)
17628 op0 = gen_reg_rtx (V32QImode);
17631 op0 = gen_lowpart (V32QImode, op0);
17633 op1 = gen_lowpart (V32QImode, op1);
17636 case MODE_VECTOR_FLOAT:
17637 ix86_avx256_split_vector_move_misalign (op0, op1);
17639 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17643 gcc_unreachable ();
17651 /* Normal *mov<mode>_internal pattern will handle
17652 unaligned loads just fine if misaligned_operand
17653 is true, and without the UNSPEC it can be combined
17654 with arithmetic instructions. */
17656 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17657 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17658 && misaligned_operand (op1, GET_MODE (op1)))
17659 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17660 /* ??? If we have typed data, then it would appear that using
17661 movdqu is the only way to get unaligned data loaded with
17663 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17665 if (GET_MODE (op0) != V16QImode)
17668 op0 = gen_reg_rtx (V16QImode);
17670 op1 = gen_lowpart (V16QImode, op1);
17671 /* We will eventually emit movups based on insn attributes. */
17672 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17674 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17676 else if (TARGET_SSE2 && mode == V2DFmode)
17681 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17682 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17683 || optimize_insn_for_size_p ())
17685 /* We will eventually emit movups based on insn attributes. */
17686 emit_insn (gen_sse2_loadupd (op0, op1));
17690 /* When SSE registers are split into halves, we can avoid
17691 writing to the top half twice. */
17692 if (TARGET_SSE_SPLIT_REGS)
17694 emit_clobber (op0);
17699 /* ??? Not sure about the best option for the Intel chips.
17700 The following would seem to satisfy; the register is
17701 entirely cleared, breaking the dependency chain. We
17702 then store to the upper half, with a dependency depth
17703 of one. A rumor has it that Intel recommends two movsd
17704 followed by an unpacklpd, but this is unconfirmed. And
17705 given that the dependency depth of the unpacklpd would
17706 still be one, I'm not sure why this would be better. */
17707 zero = CONST0_RTX (V2DFmode);
17710 m = adjust_address (op1, DFmode, 0);
17711 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17712 m = adjust_address (op1, DFmode, 8);
17713 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17720 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17721 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17722 || optimize_insn_for_size_p ())
17724 if (GET_MODE (op0) != V4SFmode)
17727 op0 = gen_reg_rtx (V4SFmode);
17729 op1 = gen_lowpart (V4SFmode, op1);
17730 emit_insn (gen_sse_loadups (op0, op1));
17732 emit_move_insn (orig_op0,
17733 gen_lowpart (GET_MODE (orig_op0), op0));
17737 if (mode != V4SFmode)
17738 t = gen_reg_rtx (V4SFmode);
17742 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17743 emit_move_insn (t, CONST0_RTX (V4SFmode));
17747 m = adjust_address (op1, V2SFmode, 0);
17748 emit_insn (gen_sse_loadlps (t, t, m));
17749 m = adjust_address (op1, V2SFmode, 8);
17750 emit_insn (gen_sse_loadhps (t, t, m));
17751 if (mode != V4SFmode)
17752 emit_move_insn (op0, gen_lowpart (mode, t));
17755 else if (MEM_P (op0))
17757 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17759 op0 = gen_lowpart (V16QImode, op0);
17760 op1 = gen_lowpart (V16QImode, op1);
17761 /* We will eventually emit movups based on insn attributes. */
17762 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17764 else if (TARGET_SSE2 && mode == V2DFmode)
17767 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17768 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17769 || optimize_insn_for_size_p ())
17770 /* We will eventually emit movups based on insn attributes. */
17771 emit_insn (gen_sse2_storeupd (op0, op1));
17774 m = adjust_address (op0, DFmode, 0);
17775 emit_insn (gen_sse2_storelpd (m, op1));
17776 m = adjust_address (op0, DFmode, 8);
17777 emit_insn (gen_sse2_storehpd (m, op1));
17782 if (mode != V4SFmode)
17783 op1 = gen_lowpart (V4SFmode, op1);
17786 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17787 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17788 || optimize_insn_for_size_p ())
17790 op0 = gen_lowpart (V4SFmode, op0);
17791 emit_insn (gen_sse_storeups (op0, op1));
17795 m = adjust_address (op0, V2SFmode, 0);
17796 emit_insn (gen_sse_storelps (m, op1));
17797 m = adjust_address (op0, V2SFmode, 8);
17798 emit_insn (gen_sse_storehps (m, op1));
17803 gcc_unreachable ();
17806 /* Helper function of ix86_fixup_binary_operands to canonicalize
17807 operand order. Returns true if the operands should be swapped. */
17810 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
17813 rtx dst = operands[0];
17814 rtx src1 = operands[1];
17815 rtx src2 = operands[2];
17817 /* If the operation is not commutative, we can't do anything. */
17818 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17821 /* Highest priority is that src1 should match dst. */
17822 if (rtx_equal_p (dst, src1))
17824 if (rtx_equal_p (dst, src2))
17827 /* Next highest priority is that immediate constants come second. */
17828 if (immediate_operand (src2, mode))
17830 if (immediate_operand (src1, mode))
17833 /* Lowest priority is that memory references should come second. */
17843 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17844 destination to use for the operation. If different from the true
17845 destination in operands[0], a copy operation will be required. */
17848 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
17851 rtx dst = operands[0];
17852 rtx src1 = operands[1];
17853 rtx src2 = operands[2];
17855 /* Canonicalize operand order. */
17856 if (ix86_swap_binary_operands_p (code, mode, operands))
17858 /* It is invalid to swap operands of different modes. */
17859 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17861 std::swap (src1, src2);
17864 /* Both source operands cannot be in memory. */
17865 if (MEM_P (src1) && MEM_P (src2))
17867 /* Optimization: Only read from memory once. */
17868 if (rtx_equal_p (src1, src2))
17870 src2 = force_reg (mode, src2);
17873 else if (rtx_equal_p (dst, src1))
17874 src2 = force_reg (mode, src2);
17876 src1 = force_reg (mode, src1);
17879 /* If the destination is memory, and we do not have matching source
17880 operands, do things in registers. */
17881 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17882 dst = gen_reg_rtx (mode);
17884 /* Source 1 cannot be a constant. */
17885 if (CONSTANT_P (src1))
17886 src1 = force_reg (mode, src1);
17888 /* Source 1 cannot be a non-matching memory. */
17889 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17890 src1 = force_reg (mode, src1);
17892 /* Improve address combine. */
17894 && GET_MODE_CLASS (mode) == MODE_INT
17896 src2 = force_reg (mode, src2);
17898 operands[1] = src1;
17899 operands[2] = src2;
17903 /* Similarly, but assume that the destination has already been
17904 set up properly. */
17907 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17908 machine_mode mode, rtx operands[])
17910 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17911 gcc_assert (dst == operands[0]);
17914 /* Attempt to expand a binary operator. Make the expansion closer to the
17915 actual machine, then just general_operand, which will allow 3 separate
17916 memory references (one output, two input) in a single insn. */
17919 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
17922 rtx src1, src2, dst, op, clob;
17924 dst = ix86_fixup_binary_operands (code, mode, operands);
17925 src1 = operands[1];
17926 src2 = operands[2];
17928 /* Emit the instruction. */
17930 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17931 if (reload_in_progress)
17933 /* Reload doesn't know about the flags register, and doesn't know that
17934 it doesn't want to clobber it. We can only do this with PLUS. */
17935 gcc_assert (code == PLUS);
17938 else if (reload_completed
17940 && !rtx_equal_p (dst, src1))
17942 /* This is going to be an LEA; avoid splitting it later. */
17947 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17948 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17951 /* Fix up the destination if needed. */
17952 if (dst != operands[0])
17953 emit_move_insn (operands[0], dst);
17956 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17957 the given OPERANDS. */
17960 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
17963 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17964 if (GET_CODE (operands[1]) == SUBREG)
17969 else if (GET_CODE (operands[2]) == SUBREG)
17974 /* Optimize (__m128i) d | (__m128i) e and similar code
17975 when d and e are float vectors into float vector logical
17976 insn. In C/C++ without using intrinsics there is no other way
17977 to express vector logical operation on float vectors than
17978 to cast them temporarily to integer vectors. */
17980 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17981 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17982 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17983 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17984 && SUBREG_BYTE (op1) == 0
17985 && (GET_CODE (op2) == CONST_VECTOR
17986 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17987 && SUBREG_BYTE (op2) == 0))
17988 && can_create_pseudo_p ())
17991 switch (GET_MODE (SUBREG_REG (op1)))
17999 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
18000 if (GET_CODE (op2) == CONST_VECTOR)
18002 op2 = gen_lowpart (GET_MODE (dst), op2);
18003 op2 = force_reg (GET_MODE (dst), op2);
18008 op2 = SUBREG_REG (operands[2]);
18009 if (!nonimmediate_operand (op2, GET_MODE (dst)))
18010 op2 = force_reg (GET_MODE (dst), op2);
18012 op1 = SUBREG_REG (op1);
18013 if (!nonimmediate_operand (op1, GET_MODE (dst)))
18014 op1 = force_reg (GET_MODE (dst), op1);
18015 emit_insn (gen_rtx_SET (VOIDmode, dst,
18016 gen_rtx_fmt_ee (code, GET_MODE (dst),
18018 emit_move_insn (operands[0], gen_lowpart (mode, dst));
18024 if (!nonimmediate_operand (operands[1], mode))
18025 operands[1] = force_reg (mode, operands[1]);
18026 if (!nonimmediate_operand (operands[2], mode))
18027 operands[2] = force_reg (mode, operands[2]);
18028 ix86_fixup_binary_operands_no_copy (code, mode, operands);
18029 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18030 gen_rtx_fmt_ee (code, mode, operands[1],
18034 /* Return TRUE or FALSE depending on whether the binary operator meets the
18035 appropriate constraints. */
18038 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18041 rtx dst = operands[0];
18042 rtx src1 = operands[1];
18043 rtx src2 = operands[2];
18045 /* Both source operands cannot be in memory. */
18046 if (MEM_P (src1) && MEM_P (src2))
18049 /* Canonicalize operand order for commutative operators. */
18050 if (ix86_swap_binary_operands_p (code, mode, operands))
18051 std::swap (src1, src2);
18053 /* If the destination is memory, we must have a matching source operand. */
18054 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18057 /* Source 1 cannot be a constant. */
18058 if (CONSTANT_P (src1))
18061 /* Source 1 cannot be a non-matching memory. */
18062 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18063 /* Support "andhi/andsi/anddi" as a zero-extending move. */
18064 return (code == AND
18067 || (TARGET_64BIT && mode == DImode))
18068 && satisfies_constraint_L (src2));
18073 /* Attempt to expand a unary operator. Make the expansion closer to the
18074 actual machine, then just general_operand, which will allow 2 separate
18075 memory references (one output, one input) in a single insn. */
18078 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18081 bool matching_memory = false;
18082 rtx src, dst, op, clob;
18087 /* If the destination is memory, and we do not have matching source
18088 operands, do things in registers. */
18091 if (rtx_equal_p (dst, src))
18092 matching_memory = true;
18094 dst = gen_reg_rtx (mode);
18097 /* When source operand is memory, destination must match. */
18098 if (MEM_P (src) && !matching_memory)
18099 src = force_reg (mode, src);
18101 /* Emit the instruction. */
18103 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
18104 if (reload_in_progress || code == NOT)
18106 /* Reload doesn't know about the flags register, and doesn't know that
18107 it doesn't want to clobber it. */
18108 gcc_assert (code == NOT);
18113 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18114 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18117 /* Fix up the destination if needed. */
18118 if (dst != operands[0])
18119 emit_move_insn (operands[0], dst);
18122 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18123 divisor are within the range [0-255]. */
18126 ix86_split_idivmod (machine_mode mode, rtx operands[],
18129 rtx_code_label *end_label, *qimode_label;
18130 rtx insn, div, mod;
18131 rtx scratch, tmp0, tmp1, tmp2;
18132 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18133 rtx (*gen_zero_extend) (rtx, rtx);
18134 rtx (*gen_test_ccno_1) (rtx, rtx);
18139 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18140 gen_test_ccno_1 = gen_testsi_ccno_1;
18141 gen_zero_extend = gen_zero_extendqisi2;
18144 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18145 gen_test_ccno_1 = gen_testdi_ccno_1;
18146 gen_zero_extend = gen_zero_extendqidi2;
18149 gcc_unreachable ();
18152 end_label = gen_label_rtx ();
18153 qimode_label = gen_label_rtx ();
18155 scratch = gen_reg_rtx (mode);
18157 /* Use 8bit unsigned divimod if dividend and divisor are within
18158 the range [0-255]. */
18159 emit_move_insn (scratch, operands[2]);
18160 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18161 scratch, 1, OPTAB_DIRECT);
18162 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18163 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18164 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18165 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18166 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18168 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
18169 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18170 JUMP_LABEL (insn) = qimode_label;
18172 /* Generate original signed/unsigned divimod. */
18173 div = gen_divmod4_1 (operands[0], operands[1],
18174 operands[2], operands[3]);
18177 /* Branch to the end. */
18178 emit_jump_insn (gen_jump (end_label));
18181 /* Generate 8bit unsigned divide. */
18182 emit_label (qimode_label);
18183 /* Don't use operands[0] for result of 8bit divide since not all
18184 registers support QImode ZERO_EXTRACT. */
18185 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18186 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18187 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18188 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18192 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18193 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18197 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18198 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18201 /* Extract remainder from AH. */
18202 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18203 if (REG_P (operands[1]))
18204 insn = emit_move_insn (operands[1], tmp1);
18207 /* Need a new scratch register since the old one has result
18209 scratch = gen_reg_rtx (mode);
18210 emit_move_insn (scratch, tmp1);
18211 insn = emit_move_insn (operands[1], scratch);
18213 set_unique_reg_note (insn, REG_EQUAL, mod);
18215 /* Zero extend quotient from AL. */
18216 tmp1 = gen_lowpart (QImode, tmp0);
18217 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18218 set_unique_reg_note (insn, REG_EQUAL, div);
18220 emit_label (end_label);
18223 #define LEA_MAX_STALL (3)
18224 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18226 /* Increase given DISTANCE in half-cycles according to
18227 dependencies between PREV and NEXT instructions.
18228 Add 1 half-cycle if there is no dependency and
18229 go to next cycle if there is some dependecy. */
18231 static unsigned int
18232 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18236 if (!prev || !next)
18237 return distance + (distance & 1) + 2;
18239 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18240 return distance + 1;
18242 FOR_EACH_INSN_USE (use, next)
18243 FOR_EACH_INSN_DEF (def, prev)
18244 if (!DF_REF_IS_ARTIFICIAL (def)
18245 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18246 return distance + (distance & 1) + 2;
18248 return distance + 1;
18251 /* Function checks if instruction INSN defines register number
18252 REGNO1 or REGNO2. */
18255 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18260 FOR_EACH_INSN_DEF (def, insn)
18261 if (DF_REF_REG_DEF_P (def)
18262 && !DF_REF_IS_ARTIFICIAL (def)
18263 && (regno1 == DF_REF_REGNO (def)
18264 || regno2 == DF_REF_REGNO (def)))
18270 /* Function checks if instruction INSN uses register number
18271 REGNO as a part of address expression. */
18274 insn_uses_reg_mem (unsigned int regno, rtx insn)
18278 FOR_EACH_INSN_USE (use, insn)
18279 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18285 /* Search backward for non-agu definition of register number REGNO1
18286 or register number REGNO2 in basic block starting from instruction
18287 START up to head of basic block or instruction INSN.
18289 Function puts true value into *FOUND var if definition was found
18290 and false otherwise.
18292 Distance in half-cycles between START and found instruction or head
18293 of BB is added to DISTANCE and returned. */
18296 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18297 rtx_insn *insn, int distance,
18298 rtx_insn *start, bool *found)
18300 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18301 rtx_insn *prev = start;
18302 rtx_insn *next = NULL;
18308 && distance < LEA_SEARCH_THRESHOLD)
18310 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18312 distance = increase_distance (prev, next, distance);
18313 if (insn_defines_reg (regno1, regno2, prev))
18315 if (recog_memoized (prev) < 0
18316 || get_attr_type (prev) != TYPE_LEA)
18325 if (prev == BB_HEAD (bb))
18328 prev = PREV_INSN (prev);
18334 /* Search backward for non-agu definition of register number REGNO1
18335 or register number REGNO2 in INSN's basic block until
18336 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18337 2. Reach neighbour BBs boundary, or
18338 3. Reach agu definition.
18339 Returns the distance between the non-agu definition point and INSN.
18340 If no definition point, returns -1. */
18343 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18346 basic_block bb = BLOCK_FOR_INSN (insn);
18348 bool found = false;
18350 if (insn != BB_HEAD (bb))
18351 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18352 distance, PREV_INSN (insn),
18355 if (!found && distance < LEA_SEARCH_THRESHOLD)
18359 bool simple_loop = false;
18361 FOR_EACH_EDGE (e, ei, bb->preds)
18364 simple_loop = true;
18369 distance = distance_non_agu_define_in_bb (regno1, regno2,
18371 BB_END (bb), &found);
18374 int shortest_dist = -1;
18375 bool found_in_bb = false;
18377 FOR_EACH_EDGE (e, ei, bb->preds)
18380 = distance_non_agu_define_in_bb (regno1, regno2,
18386 if (shortest_dist < 0)
18387 shortest_dist = bb_dist;
18388 else if (bb_dist > 0)
18389 shortest_dist = MIN (bb_dist, shortest_dist);
18395 distance = shortest_dist;
18399 /* get_attr_type may modify recog data. We want to make sure
18400 that recog data is valid for instruction INSN, on which
18401 distance_non_agu_define is called. INSN is unchanged here. */
18402 extract_insn_cached (insn);
18407 return distance >> 1;
18410 /* Return the distance in half-cycles between INSN and the next
18411 insn that uses register number REGNO in memory address added
18412 to DISTANCE. Return -1 if REGNO0 is set.
18414 Put true value into *FOUND if register usage was found and
18416 Put true value into *REDEFINED if register redefinition was
18417 found and false otherwise. */
18420 distance_agu_use_in_bb (unsigned int regno,
18421 rtx_insn *insn, int distance, rtx_insn *start,
18422 bool *found, bool *redefined)
18424 basic_block bb = NULL;
18425 rtx_insn *next = start;
18426 rtx_insn *prev = NULL;
18429 *redefined = false;
18431 if (start != NULL_RTX)
18433 bb = BLOCK_FOR_INSN (start);
18434 if (start != BB_HEAD (bb))
18435 /* If insn and start belong to the same bb, set prev to insn,
18436 so the call to increase_distance will increase the distance
18437 between insns by 1. */
18443 && distance < LEA_SEARCH_THRESHOLD)
18445 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18447 distance = increase_distance(prev, next, distance);
18448 if (insn_uses_reg_mem (regno, next))
18450 /* Return DISTANCE if OP0 is used in memory
18451 address in NEXT. */
18456 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18458 /* Return -1 if OP0 is set in NEXT. */
18466 if (next == BB_END (bb))
18469 next = NEXT_INSN (next);
18475 /* Return the distance between INSN and the next insn that uses
18476 register number REGNO0 in memory address. Return -1 if no such
18477 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18480 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18482 basic_block bb = BLOCK_FOR_INSN (insn);
18484 bool found = false;
18485 bool redefined = false;
18487 if (insn != BB_END (bb))
18488 distance = distance_agu_use_in_bb (regno0, insn, distance,
18490 &found, &redefined);
18492 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18496 bool simple_loop = false;
18498 FOR_EACH_EDGE (e, ei, bb->succs)
18501 simple_loop = true;
18506 distance = distance_agu_use_in_bb (regno0, insn,
18507 distance, BB_HEAD (bb),
18508 &found, &redefined);
18511 int shortest_dist = -1;
18512 bool found_in_bb = false;
18513 bool redefined_in_bb = false;
18515 FOR_EACH_EDGE (e, ei, bb->succs)
18518 = distance_agu_use_in_bb (regno0, insn,
18519 distance, BB_HEAD (e->dest),
18520 &found_in_bb, &redefined_in_bb);
18523 if (shortest_dist < 0)
18524 shortest_dist = bb_dist;
18525 else if (bb_dist > 0)
18526 shortest_dist = MIN (bb_dist, shortest_dist);
18532 distance = shortest_dist;
18536 if (!found || redefined)
18539 return distance >> 1;
18542 /* Define this macro to tune LEA priority vs ADD, it take effect when
18543 there is a dilemma of choicing LEA or ADD
18544 Negative value: ADD is more preferred than LEA
18546 Positive value: LEA is more preferred than ADD*/
18547 #define IX86_LEA_PRIORITY 0
18549 /* Return true if usage of lea INSN has performance advantage
18550 over a sequence of instructions. Instructions sequence has
18551 SPLIT_COST cycles higher latency than lea latency. */
18554 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18555 unsigned int regno2, int split_cost, bool has_scale)
18557 int dist_define, dist_use;
18559 /* For Silvermont if using a 2-source or 3-source LEA for
18560 non-destructive destination purposes, or due to wanting
18561 ability to use SCALE, the use of LEA is justified. */
18562 if (TARGET_SILVERMONT || TARGET_INTEL)
18566 if (split_cost < 1)
18568 if (regno0 == regno1 || regno0 == regno2)
18573 dist_define = distance_non_agu_define (regno1, regno2, insn);
18574 dist_use = distance_agu_use (regno0, insn);
18576 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18578 /* If there is no non AGU operand definition, no AGU
18579 operand usage and split cost is 0 then both lea
18580 and non lea variants have same priority. Currently
18581 we prefer lea for 64 bit code and non lea on 32 bit
18583 if (dist_use < 0 && split_cost == 0)
18584 return TARGET_64BIT || IX86_LEA_PRIORITY;
18589 /* With longer definitions distance lea is more preferable.
18590 Here we change it to take into account splitting cost and
18592 dist_define += split_cost + IX86_LEA_PRIORITY;
18594 /* If there is no use in memory addess then we just check
18595 that split cost exceeds AGU stall. */
18597 return dist_define > LEA_MAX_STALL;
18599 /* If this insn has both backward non-agu dependence and forward
18600 agu dependence, the one with short distance takes effect. */
18601 return dist_define >= dist_use;
18604 /* Return true if it is legal to clobber flags by INSN and
18605 false otherwise. */
18608 ix86_ok_to_clobber_flags (rtx_insn *insn)
18610 basic_block bb = BLOCK_FOR_INSN (insn);
18616 if (NONDEBUG_INSN_P (insn))
18618 FOR_EACH_INSN_USE (use, insn)
18619 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18622 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18626 if (insn == BB_END (bb))
18629 insn = NEXT_INSN (insn);
18632 live = df_get_live_out(bb);
18633 return !REGNO_REG_SET_P (live, FLAGS_REG);
18636 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18637 move and add to avoid AGU stalls. */
18640 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18642 unsigned int regno0, regno1, regno2;
18644 /* Check if we need to optimize. */
18645 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18648 /* Check it is correct to split here. */
18649 if (!ix86_ok_to_clobber_flags(insn))
18652 regno0 = true_regnum (operands[0]);
18653 regno1 = true_regnum (operands[1]);
18654 regno2 = true_regnum (operands[2]);
18656 /* We need to split only adds with non destructive
18657 destination operand. */
18658 if (regno0 == regno1 || regno0 == regno2)
18661 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18664 /* Return true if we should emit lea instruction instead of mov
18668 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18670 unsigned int regno0, regno1;
18672 /* Check if we need to optimize. */
18673 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18676 /* Use lea for reg to reg moves only. */
18677 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18680 regno0 = true_regnum (operands[0]);
18681 regno1 = true_regnum (operands[1]);
18683 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18686 /* Return true if we need to split lea into a sequence of
18687 instructions to avoid AGU stalls. */
18690 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18692 unsigned int regno0, regno1, regno2;
18694 struct ix86_address parts;
18697 /* Check we need to optimize. */
18698 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18701 /* The "at least two components" test below might not catch simple
18702 move or zero extension insns if parts.base is non-NULL and parts.disp
18703 is const0_rtx as the only components in the address, e.g. if the
18704 register is %rbp or %r13. As this test is much cheaper and moves or
18705 zero extensions are the common case, do this check first. */
18706 if (REG_P (operands[1])
18707 || (SImode_address_operand (operands[1], VOIDmode)
18708 && REG_P (XEXP (operands[1], 0))))
18711 /* Check if it is OK to split here. */
18712 if (!ix86_ok_to_clobber_flags (insn))
18715 ok = ix86_decompose_address (operands[1], &parts);
18718 /* There should be at least two components in the address. */
18719 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18720 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18723 /* We should not split into add if non legitimate pic
18724 operand is used as displacement. */
18725 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18728 regno0 = true_regnum (operands[0]) ;
18729 regno1 = INVALID_REGNUM;
18730 regno2 = INVALID_REGNUM;
18733 regno1 = true_regnum (parts.base);
18735 regno2 = true_regnum (parts.index);
18739 /* Compute how many cycles we will add to execution time
18740 if split lea into a sequence of instructions. */
18741 if (parts.base || parts.index)
18743 /* Have to use mov instruction if non desctructive
18744 destination form is used. */
18745 if (regno1 != regno0 && regno2 != regno0)
18748 /* Have to add index to base if both exist. */
18749 if (parts.base && parts.index)
18752 /* Have to use shift and adds if scale is 2 or greater. */
18753 if (parts.scale > 1)
18755 if (regno0 != regno1)
18757 else if (regno2 == regno0)
18760 split_cost += parts.scale;
18763 /* Have to use add instruction with immediate if
18764 disp is non zero. */
18765 if (parts.disp && parts.disp != const0_rtx)
18768 /* Subtract the price of lea. */
18772 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18776 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18777 matches destination. RTX includes clobber of FLAGS_REG. */
18780 ix86_emit_binop (enum rtx_code code, machine_mode mode,
18785 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18786 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18788 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18791 /* Return true if regno1 def is nearest to the insn. */
18794 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18796 rtx_insn *prev = insn;
18797 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18801 while (prev && prev != start)
18803 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18805 prev = PREV_INSN (prev);
18808 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18810 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18812 prev = PREV_INSN (prev);
18815 /* None of the regs is defined in the bb. */
18819 /* Split lea instructions into a sequence of instructions
18820 which are executed on ALU to avoid AGU stalls.
18821 It is assumed that it is allowed to clobber flags register
18822 at lea position. */
18825 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
18827 unsigned int regno0, regno1, regno2;
18828 struct ix86_address parts;
18832 ok = ix86_decompose_address (operands[1], &parts);
18835 target = gen_lowpart (mode, operands[0]);
18837 regno0 = true_regnum (target);
18838 regno1 = INVALID_REGNUM;
18839 regno2 = INVALID_REGNUM;
18843 parts.base = gen_lowpart (mode, parts.base);
18844 regno1 = true_regnum (parts.base);
18849 parts.index = gen_lowpart (mode, parts.index);
18850 regno2 = true_regnum (parts.index);
18854 parts.disp = gen_lowpart (mode, parts.disp);
18856 if (parts.scale > 1)
18858 /* Case r1 = r1 + ... */
18859 if (regno1 == regno0)
18861 /* If we have a case r1 = r1 + C * r2 then we
18862 should use multiplication which is very
18863 expensive. Assume cost model is wrong if we
18864 have such case here. */
18865 gcc_assert (regno2 != regno0);
18867 for (adds = parts.scale; adds > 0; adds--)
18868 ix86_emit_binop (PLUS, mode, target, parts.index);
18872 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18873 if (regno0 != regno2)
18874 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18876 /* Use shift for scaling. */
18877 ix86_emit_binop (ASHIFT, mode, target,
18878 GEN_INT (exact_log2 (parts.scale)));
18881 ix86_emit_binop (PLUS, mode, target, parts.base);
18883 if (parts.disp && parts.disp != const0_rtx)
18884 ix86_emit_binop (PLUS, mode, target, parts.disp);
18887 else if (!parts.base && !parts.index)
18889 gcc_assert(parts.disp);
18890 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18896 if (regno0 != regno2)
18897 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18899 else if (!parts.index)
18901 if (regno0 != regno1)
18902 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18906 if (regno0 == regno1)
18908 else if (regno0 == regno2)
18914 /* Find better operand for SET instruction, depending
18915 on which definition is farther from the insn. */
18916 if (find_nearest_reg_def (insn, regno1, regno2))
18917 tmp = parts.index, tmp1 = parts.base;
18919 tmp = parts.base, tmp1 = parts.index;
18921 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18923 if (parts.disp && parts.disp != const0_rtx)
18924 ix86_emit_binop (PLUS, mode, target, parts.disp);
18926 ix86_emit_binop (PLUS, mode, target, tmp1);
18930 ix86_emit_binop (PLUS, mode, target, tmp);
18933 if (parts.disp && parts.disp != const0_rtx)
18934 ix86_emit_binop (PLUS, mode, target, parts.disp);
18938 /* Return true if it is ok to optimize an ADD operation to LEA
18939 operation to avoid flag register consumation. For most processors,
18940 ADD is faster than LEA. For the processors like BONNELL, if the
18941 destination register of LEA holds an actual address which will be
18942 used soon, LEA is better and otherwise ADD is better. */
18945 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
18947 unsigned int regno0 = true_regnum (operands[0]);
18948 unsigned int regno1 = true_regnum (operands[1]);
18949 unsigned int regno2 = true_regnum (operands[2]);
18951 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18952 if (regno0 != regno1 && regno0 != regno2)
18955 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18958 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18961 /* Return true if destination reg of SET_BODY is shift count of
18965 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18971 /* Retrieve destination of SET_BODY. */
18972 switch (GET_CODE (set_body))
18975 set_dest = SET_DEST (set_body);
18976 if (!set_dest || !REG_P (set_dest))
18980 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18981 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18989 /* Retrieve shift count of USE_BODY. */
18990 switch (GET_CODE (use_body))
18993 shift_rtx = XEXP (use_body, 1);
18996 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18997 if (ix86_dep_by_shift_count_body (set_body,
18998 XVECEXP (use_body, 0, i)))
19006 && (GET_CODE (shift_rtx) == ASHIFT
19007 || GET_CODE (shift_rtx) == LSHIFTRT
19008 || GET_CODE (shift_rtx) == ASHIFTRT
19009 || GET_CODE (shift_rtx) == ROTATE
19010 || GET_CODE (shift_rtx) == ROTATERT))
19012 rtx shift_count = XEXP (shift_rtx, 1);
19014 /* Return true if shift count is dest of SET_BODY. */
19015 if (REG_P (shift_count))
19017 /* Add check since it can be invoked before register
19018 allocation in pre-reload schedule. */
19019 if (reload_completed
19020 && true_regnum (set_dest) == true_regnum (shift_count))
19022 else if (REGNO(set_dest) == REGNO(shift_count))
19030 /* Return true if destination reg of SET_INSN is shift count of
19034 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19036 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19037 PATTERN (use_insn));
19040 /* Return TRUE or FALSE depending on whether the unary operator meets the
19041 appropriate constraints. */
19044 ix86_unary_operator_ok (enum rtx_code,
19048 /* If one of operands is memory, source and destination must match. */
19049 if ((MEM_P (operands[0])
19050 || MEM_P (operands[1]))
19051 && ! rtx_equal_p (operands[0], operands[1]))
19056 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19057 are ok, keeping in mind the possible movddup alternative. */
19060 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19062 if (MEM_P (operands[0]))
19063 return rtx_equal_p (operands[0], operands[1 + high]);
19064 if (MEM_P (operands[1]) && MEM_P (operands[2]))
19065 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19069 /* Post-reload splitter for converting an SF or DFmode value in an
19070 SSE register into an unsigned SImode. */
19073 ix86_split_convert_uns_si_sse (rtx operands[])
19075 machine_mode vecmode;
19076 rtx value, large, zero_or_two31, input, two31, x;
19078 large = operands[1];
19079 zero_or_two31 = operands[2];
19080 input = operands[3];
19081 two31 = operands[4];
19082 vecmode = GET_MODE (large);
19083 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19085 /* Load up the value into the low element. We must ensure that the other
19086 elements are valid floats -- zero is the easiest such value. */
19089 if (vecmode == V4SFmode)
19090 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19092 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19096 input = gen_rtx_REG (vecmode, REGNO (input));
19097 emit_move_insn (value, CONST0_RTX (vecmode));
19098 if (vecmode == V4SFmode)
19099 emit_insn (gen_sse_movss (value, value, input));
19101 emit_insn (gen_sse2_movsd (value, value, input));
19104 emit_move_insn (large, two31);
19105 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19107 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19108 emit_insn (gen_rtx_SET (VOIDmode, large, x));
19110 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19111 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
19113 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19114 emit_insn (gen_rtx_SET (VOIDmode, value, x));
19116 large = gen_rtx_REG (V4SImode, REGNO (large));
19117 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19119 x = gen_rtx_REG (V4SImode, REGNO (value));
19120 if (vecmode == V4SFmode)
19121 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19123 emit_insn (gen_sse2_cvttpd2dq (x, value));
19126 emit_insn (gen_xorv4si3 (value, value, large));
19129 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19130 Expects the 64-bit DImode to be supplied in a pair of integral
19131 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19132 -mfpmath=sse, !optimize_size only. */
19135 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19137 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19138 rtx int_xmm, fp_xmm;
19139 rtx biases, exponents;
19142 int_xmm = gen_reg_rtx (V4SImode);
19143 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19144 emit_insn (gen_movdi_to_sse (int_xmm, input));
19145 else if (TARGET_SSE_SPLIT_REGS)
19147 emit_clobber (int_xmm);
19148 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19152 x = gen_reg_rtx (V2DImode);
19153 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19154 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19157 x = gen_rtx_CONST_VECTOR (V4SImode,
19158 gen_rtvec (4, GEN_INT (0x43300000UL),
19159 GEN_INT (0x45300000UL),
19160 const0_rtx, const0_rtx));
19161 exponents = validize_mem (force_const_mem (V4SImode, x));
19163 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19164 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19166 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19167 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19168 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19169 (0x1.0p84 + double(fp_value_hi_xmm)).
19170 Note these exponents differ by 32. */
19172 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19174 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19175 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19176 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19177 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19178 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19179 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19180 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19181 biases = validize_mem (force_const_mem (V2DFmode, biases));
19182 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19184 /* Add the upper and lower DFmode values together. */
19186 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19189 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19190 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19191 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19194 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19197 /* Not used, but eases macroization of patterns. */
19199 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19201 gcc_unreachable ();
19204 /* Convert an unsigned SImode value into a DFmode. Only currently used
19205 for SSE, but applicable anywhere. */
19208 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19210 REAL_VALUE_TYPE TWO31r;
19213 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19214 NULL, 1, OPTAB_DIRECT);
19216 fp = gen_reg_rtx (DFmode);
19217 emit_insn (gen_floatsidf2 (fp, x));
19219 real_ldexp (&TWO31r, &dconst1, 31);
19220 x = const_double_from_real_value (TWO31r, DFmode);
19222 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19224 emit_move_insn (target, x);
19227 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19228 32-bit mode; otherwise we have a direct convert instruction. */
19231 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19233 REAL_VALUE_TYPE TWO32r;
19234 rtx fp_lo, fp_hi, x;
19236 fp_lo = gen_reg_rtx (DFmode);
19237 fp_hi = gen_reg_rtx (DFmode);
19239 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19241 real_ldexp (&TWO32r, &dconst1, 32);
19242 x = const_double_from_real_value (TWO32r, DFmode);
19243 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19245 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19247 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19250 emit_move_insn (target, x);
19253 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19254 For x86_32, -mfpmath=sse, !optimize_size only. */
19256 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19258 REAL_VALUE_TYPE ONE16r;
19259 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19261 real_ldexp (&ONE16r, &dconst1, 16);
19262 x = const_double_from_real_value (ONE16r, SFmode);
19263 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19264 NULL, 0, OPTAB_DIRECT);
19265 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19266 NULL, 0, OPTAB_DIRECT);
19267 fp_hi = gen_reg_rtx (SFmode);
19268 fp_lo = gen_reg_rtx (SFmode);
19269 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19270 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19271 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19273 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19275 if (!rtx_equal_p (target, fp_hi))
19276 emit_move_insn (target, fp_hi);
19279 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19280 a vector of unsigned ints VAL to vector of floats TARGET. */
19283 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19286 REAL_VALUE_TYPE TWO16r;
19287 machine_mode intmode = GET_MODE (val);
19288 machine_mode fltmode = GET_MODE (target);
19289 rtx (*cvt) (rtx, rtx);
19291 if (intmode == V4SImode)
19292 cvt = gen_floatv4siv4sf2;
19294 cvt = gen_floatv8siv8sf2;
19295 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19296 tmp[0] = force_reg (intmode, tmp[0]);
19297 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19299 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19300 NULL_RTX, 1, OPTAB_DIRECT);
19301 tmp[3] = gen_reg_rtx (fltmode);
19302 emit_insn (cvt (tmp[3], tmp[1]));
19303 tmp[4] = gen_reg_rtx (fltmode);
19304 emit_insn (cvt (tmp[4], tmp[2]));
19305 real_ldexp (&TWO16r, &dconst1, 16);
19306 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19307 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19308 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19310 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19312 if (tmp[7] != target)
19313 emit_move_insn (target, tmp[7]);
19316 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19317 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19318 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19319 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19322 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19324 REAL_VALUE_TYPE TWO31r;
19325 rtx two31r, tmp[4];
19326 machine_mode mode = GET_MODE (val);
19327 machine_mode scalarmode = GET_MODE_INNER (mode);
19328 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19329 rtx (*cmp) (rtx, rtx, rtx, rtx);
19332 for (i = 0; i < 3; i++)
19333 tmp[i] = gen_reg_rtx (mode);
19334 real_ldexp (&TWO31r, &dconst1, 31);
19335 two31r = const_double_from_real_value (TWO31r, scalarmode);
19336 two31r = ix86_build_const_vector (mode, 1, two31r);
19337 two31r = force_reg (mode, two31r);
19340 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19341 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19342 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19343 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19344 default: gcc_unreachable ();
19346 tmp[3] = gen_rtx_LE (mode, two31r, val);
19347 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19348 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19350 if (intmode == V4SImode || TARGET_AVX2)
19351 *xorp = expand_simple_binop (intmode, ASHIFT,
19352 gen_lowpart (intmode, tmp[0]),
19353 GEN_INT (31), NULL_RTX, 0,
19357 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
19358 two31 = ix86_build_const_vector (intmode, 1, two31);
19359 *xorp = expand_simple_binop (intmode, AND,
19360 gen_lowpart (intmode, tmp[0]),
19361 two31, NULL_RTX, 0,
19364 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19368 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19369 then replicate the value for all elements of the vector
19373 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19377 machine_mode scalar_mode;
19400 n_elt = GET_MODE_NUNITS (mode);
19401 v = rtvec_alloc (n_elt);
19402 scalar_mode = GET_MODE_INNER (mode);
19404 RTVEC_ELT (v, 0) = value;
19406 for (i = 1; i < n_elt; ++i)
19407 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19409 return gen_rtx_CONST_VECTOR (mode, v);
19412 gcc_unreachable ();
19416 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19417 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19418 for an SSE register. If VECT is true, then replicate the mask for
19419 all elements of the vector register. If INVERT is true, then create
19420 a mask excluding the sign bit. */
19423 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19425 machine_mode vec_mode, imode;
19426 HOST_WIDE_INT hi, lo;
19431 /* Find the sign bit, sign extended to 2*HWI. */
19441 mode = GET_MODE_INNER (mode);
19443 lo = 0x80000000, hi = lo < 0;
19453 mode = GET_MODE_INNER (mode);
19455 if (HOST_BITS_PER_WIDE_INT >= 64)
19456 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19458 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19463 vec_mode = VOIDmode;
19464 if (HOST_BITS_PER_WIDE_INT >= 64)
19467 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19474 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19478 lo = ~lo, hi = ~hi;
19484 mask = immed_double_const (lo, hi, imode);
19486 vec = gen_rtvec (2, v, mask);
19487 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19488 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19495 gcc_unreachable ();
19499 lo = ~lo, hi = ~hi;
19501 /* Force this value into the low part of a fp vector constant. */
19502 mask = immed_double_const (lo, hi, imode);
19503 mask = gen_lowpart (mode, mask);
19505 if (vec_mode == VOIDmode)
19506 return force_reg (mode, mask);
19508 v = ix86_build_const_vector (vec_mode, vect, mask);
19509 return force_reg (vec_mode, v);
19512 /* Generate code for floating point ABS or NEG. */
19515 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19518 rtx mask, set, dst, src;
19519 bool use_sse = false;
19520 bool vector_mode = VECTOR_MODE_P (mode);
19521 machine_mode vmode = mode;
19525 else if (mode == TFmode)
19527 else if (TARGET_SSE_MATH)
19529 use_sse = SSE_FLOAT_MODE_P (mode);
19530 if (mode == SFmode)
19532 else if (mode == DFmode)
19536 /* NEG and ABS performed with SSE use bitwise mask operations.
19537 Create the appropriate mask now. */
19539 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19546 set = gen_rtx_fmt_e (code, mode, src);
19547 set = gen_rtx_SET (VOIDmode, dst, set);
19554 use = gen_rtx_USE (VOIDmode, mask);
19556 par = gen_rtvec (2, set, use);
19559 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19560 par = gen_rtvec (3, set, use, clob);
19562 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19568 /* Expand a copysign operation. Special case operand 0 being a constant. */
19571 ix86_expand_copysign (rtx operands[])
19573 machine_mode mode, vmode;
19574 rtx dest, op0, op1, mask, nmask;
19576 dest = operands[0];
19580 mode = GET_MODE (dest);
19582 if (mode == SFmode)
19584 else if (mode == DFmode)
19589 if (GET_CODE (op0) == CONST_DOUBLE)
19591 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19593 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19594 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19596 if (mode == SFmode || mode == DFmode)
19598 if (op0 == CONST0_RTX (mode))
19599 op0 = CONST0_RTX (vmode);
19602 rtx v = ix86_build_const_vector (vmode, false, op0);
19604 op0 = force_reg (vmode, v);
19607 else if (op0 != CONST0_RTX (mode))
19608 op0 = force_reg (mode, op0);
19610 mask = ix86_build_signbit_mask (vmode, 0, 0);
19612 if (mode == SFmode)
19613 copysign_insn = gen_copysignsf3_const;
19614 else if (mode == DFmode)
19615 copysign_insn = gen_copysigndf3_const;
19617 copysign_insn = gen_copysigntf3_const;
19619 emit_insn (copysign_insn (dest, op0, op1, mask));
19623 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19625 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19626 mask = ix86_build_signbit_mask (vmode, 0, 0);
19628 if (mode == SFmode)
19629 copysign_insn = gen_copysignsf3_var;
19630 else if (mode == DFmode)
19631 copysign_insn = gen_copysigndf3_var;
19633 copysign_insn = gen_copysigntf3_var;
19635 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19639 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19640 be a constant, and so has already been expanded into a vector constant. */
19643 ix86_split_copysign_const (rtx operands[])
19645 machine_mode mode, vmode;
19646 rtx dest, op0, mask, x;
19648 dest = operands[0];
19650 mask = operands[3];
19652 mode = GET_MODE (dest);
19653 vmode = GET_MODE (mask);
19655 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19656 x = gen_rtx_AND (vmode, dest, mask);
19657 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19659 if (op0 != CONST0_RTX (vmode))
19661 x = gen_rtx_IOR (vmode, dest, op0);
19662 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19666 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19667 so we have to do two masks. */
19670 ix86_split_copysign_var (rtx operands[])
19672 machine_mode mode, vmode;
19673 rtx dest, scratch, op0, op1, mask, nmask, x;
19675 dest = operands[0];
19676 scratch = operands[1];
19679 nmask = operands[4];
19680 mask = operands[5];
19682 mode = GET_MODE (dest);
19683 vmode = GET_MODE (mask);
19685 if (rtx_equal_p (op0, op1))
19687 /* Shouldn't happen often (it's useless, obviously), but when it does
19688 we'd generate incorrect code if we continue below. */
19689 emit_move_insn (dest, op0);
19693 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19695 gcc_assert (REGNO (op1) == REGNO (scratch));
19697 x = gen_rtx_AND (vmode, scratch, mask);
19698 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19701 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19702 x = gen_rtx_NOT (vmode, dest);
19703 x = gen_rtx_AND (vmode, x, op0);
19704 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19708 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19710 x = gen_rtx_AND (vmode, scratch, mask);
19712 else /* alternative 2,4 */
19714 gcc_assert (REGNO (mask) == REGNO (scratch));
19715 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19716 x = gen_rtx_AND (vmode, scratch, op1);
19718 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19720 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19722 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19723 x = gen_rtx_AND (vmode, dest, nmask);
19725 else /* alternative 3,4 */
19727 gcc_assert (REGNO (nmask) == REGNO (dest));
19729 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19730 x = gen_rtx_AND (vmode, dest, op0);
19732 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19735 x = gen_rtx_IOR (vmode, dest, scratch);
19736 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19739 /* Return TRUE or FALSE depending on whether the first SET in INSN
19740 has source and destination with matching CC modes, and that the
19741 CC mode is at least as constrained as REQ_MODE. */
19744 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19747 machine_mode set_mode;
19749 set = PATTERN (insn);
19750 if (GET_CODE (set) == PARALLEL)
19751 set = XVECEXP (set, 0, 0);
19752 gcc_assert (GET_CODE (set) == SET);
19753 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19755 set_mode = GET_MODE (SET_DEST (set));
19759 if (req_mode != CCNOmode
19760 && (req_mode != CCmode
19761 || XEXP (SET_SRC (set), 1) != const0_rtx))
19765 if (req_mode == CCGCmode)
19769 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19773 if (req_mode == CCZmode)
19783 if (set_mode != req_mode)
19788 gcc_unreachable ();
19791 return GET_MODE (SET_SRC (set)) == set_mode;
19794 /* Generate insn patterns to do an integer compare of OPERANDS. */
19797 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19799 machine_mode cmpmode;
19802 cmpmode = SELECT_CC_MODE (code, op0, op1);
19803 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19805 /* This is very simple, but making the interface the same as in the
19806 FP case makes the rest of the code easier. */
19807 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19808 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19810 /* Return the test that should be put into the flags user, i.e.
19811 the bcc, scc, or cmov instruction. */
19812 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19815 /* Figure out whether to use ordered or unordered fp comparisons.
19816 Return the appropriate mode to use. */
19819 ix86_fp_compare_mode (enum rtx_code)
19821 /* ??? In order to make all comparisons reversible, we do all comparisons
19822 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19823 all forms trapping and nontrapping comparisons, we can make inequality
19824 comparisons trapping again, since it results in better code when using
19825 FCOM based compares. */
19826 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19830 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19832 machine_mode mode = GET_MODE (op0);
19834 if (SCALAR_FLOAT_MODE_P (mode))
19836 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19837 return ix86_fp_compare_mode (code);
19842 /* Only zero flag is needed. */
19843 case EQ: /* ZF=0 */
19844 case NE: /* ZF!=0 */
19846 /* Codes needing carry flag. */
19847 case GEU: /* CF=0 */
19848 case LTU: /* CF=1 */
19849 /* Detect overflow checks. They need just the carry flag. */
19850 if (GET_CODE (op0) == PLUS
19851 && rtx_equal_p (op1, XEXP (op0, 0)))
19855 case GTU: /* CF=0 & ZF=0 */
19856 case LEU: /* CF=1 | ZF=1 */
19858 /* Codes possibly doable only with sign flag when
19859 comparing against zero. */
19860 case GE: /* SF=OF or SF=0 */
19861 case LT: /* SF<>OF or SF=1 */
19862 if (op1 == const0_rtx)
19865 /* For other cases Carry flag is not required. */
19867 /* Codes doable only with sign flag when comparing
19868 against zero, but we miss jump instruction for it
19869 so we need to use relational tests against overflow
19870 that thus needs to be zero. */
19871 case GT: /* ZF=0 & SF=OF */
19872 case LE: /* ZF=1 | SF<>OF */
19873 if (op1 == const0_rtx)
19877 /* strcmp pattern do (use flags) and combine may ask us for proper
19882 gcc_unreachable ();
19886 /* Return the fixed registers used for condition codes. */
19889 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19896 /* If two condition code modes are compatible, return a condition code
19897 mode which is compatible with both. Otherwise, return
19900 static machine_mode
19901 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
19906 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19909 if ((m1 == CCGCmode && m2 == CCGOCmode)
19910 || (m1 == CCGOCmode && m2 == CCGCmode))
19913 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19915 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19921 gcc_unreachable ();
19951 /* These are only compatible with themselves, which we already
19958 /* Return a comparison we can do and that it is equivalent to
19959 swap_condition (code) apart possibly from orderedness.
19960 But, never change orderedness if TARGET_IEEE_FP, returning
19961 UNKNOWN in that case if necessary. */
19963 static enum rtx_code
19964 ix86_fp_swap_condition (enum rtx_code code)
19968 case GT: /* GTU - CF=0 & ZF=0 */
19969 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19970 case GE: /* GEU - CF=0 */
19971 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19972 case UNLT: /* LTU - CF=1 */
19973 return TARGET_IEEE_FP ? UNKNOWN : GT;
19974 case UNLE: /* LEU - CF=1 | ZF=1 */
19975 return TARGET_IEEE_FP ? UNKNOWN : GE;
19977 return swap_condition (code);
19981 /* Return cost of comparison CODE using the best strategy for performance.
19982 All following functions do use number of instructions as a cost metrics.
19983 In future this should be tweaked to compute bytes for optimize_size and
19984 take into account performance of various instructions on various CPUs. */
19987 ix86_fp_comparison_cost (enum rtx_code code)
19991 /* The cost of code using bit-twiddling on %ah. */
20008 arith_cost = TARGET_IEEE_FP ? 5 : 4;
20012 arith_cost = TARGET_IEEE_FP ? 6 : 4;
20015 gcc_unreachable ();
20018 switch (ix86_fp_comparison_strategy (code))
20020 case IX86_FPCMP_COMI:
20021 return arith_cost > 4 ? 3 : 2;
20022 case IX86_FPCMP_SAHF:
20023 return arith_cost > 4 ? 4 : 3;
20029 /* Return strategy to use for floating-point. We assume that fcomi is always
20030 preferrable where available, since that is also true when looking at size
20031 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
20033 enum ix86_fpcmp_strategy
20034 ix86_fp_comparison_strategy (enum rtx_code)
20036 /* Do fcomi/sahf based test when profitable. */
20039 return IX86_FPCMP_COMI;
20041 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20042 return IX86_FPCMP_SAHF;
20044 return IX86_FPCMP_ARITH;
20047 /* Swap, force into registers, or otherwise massage the two operands
20048 to a fp comparison. The operands are updated in place; the new
20049 comparison code is returned. */
20051 static enum rtx_code
20052 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20054 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20055 rtx op0 = *pop0, op1 = *pop1;
20056 machine_mode op_mode = GET_MODE (op0);
20057 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20059 /* All of the unordered compare instructions only work on registers.
20060 The same is true of the fcomi compare instructions. The XFmode
20061 compare instructions require registers except when comparing
20062 against zero or when converting operand 1 from fixed point to
20066 && (fpcmp_mode == CCFPUmode
20067 || (op_mode == XFmode
20068 && ! (standard_80387_constant_p (op0) == 1
20069 || standard_80387_constant_p (op1) == 1)
20070 && GET_CODE (op1) != FLOAT)
20071 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20073 op0 = force_reg (op_mode, op0);
20074 op1 = force_reg (op_mode, op1);
20078 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
20079 things around if they appear profitable, otherwise force op0
20080 into a register. */
20082 if (standard_80387_constant_p (op0) == 0
20084 && ! (standard_80387_constant_p (op1) == 0
20087 enum rtx_code new_code = ix86_fp_swap_condition (code);
20088 if (new_code != UNKNOWN)
20090 std::swap (op0, op1);
20096 op0 = force_reg (op_mode, op0);
20098 if (CONSTANT_P (op1))
20100 int tmp = standard_80387_constant_p (op1);
20102 op1 = validize_mem (force_const_mem (op_mode, op1));
20106 op1 = force_reg (op_mode, op1);
20109 op1 = force_reg (op_mode, op1);
20113 /* Try to rearrange the comparison to make it cheaper. */
20114 if (ix86_fp_comparison_cost (code)
20115 > ix86_fp_comparison_cost (swap_condition (code))
20116 && (REG_P (op1) || can_create_pseudo_p ()))
20118 std::swap (op0, op1);
20119 code = swap_condition (code);
20121 op0 = force_reg (op_mode, op0);
20129 /* Convert comparison codes we use to represent FP comparison to integer
20130 code that will result in proper branch. Return UNKNOWN if no such code
20134 ix86_fp_compare_code_to_integer (enum rtx_code code)
20163 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20166 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20168 machine_mode fpcmp_mode, intcmp_mode;
20171 fpcmp_mode = ix86_fp_compare_mode (code);
20172 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20174 /* Do fcomi/sahf based test when profitable. */
20175 switch (ix86_fp_comparison_strategy (code))
20177 case IX86_FPCMP_COMI:
20178 intcmp_mode = fpcmp_mode;
20179 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20180 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20185 case IX86_FPCMP_SAHF:
20186 intcmp_mode = fpcmp_mode;
20187 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20188 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20192 scratch = gen_reg_rtx (HImode);
20193 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20194 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20197 case IX86_FPCMP_ARITH:
20198 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20199 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20200 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20202 scratch = gen_reg_rtx (HImode);
20203 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
20205 /* In the unordered case, we have to check C2 for NaN's, which
20206 doesn't happen to work out to anything nice combination-wise.
20207 So do some bit twiddling on the value we've got in AH to come
20208 up with an appropriate set of condition codes. */
20210 intcmp_mode = CCNOmode;
20215 if (code == GT || !TARGET_IEEE_FP)
20217 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20222 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20223 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20224 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20225 intcmp_mode = CCmode;
20231 if (code == LT && TARGET_IEEE_FP)
20233 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20234 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20235 intcmp_mode = CCmode;
20240 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20246 if (code == GE || !TARGET_IEEE_FP)
20248 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20253 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20254 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20260 if (code == LE && TARGET_IEEE_FP)
20262 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20263 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20264 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20265 intcmp_mode = CCmode;
20270 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20276 if (code == EQ && TARGET_IEEE_FP)
20278 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20279 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20280 intcmp_mode = CCmode;
20285 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20291 if (code == NE && TARGET_IEEE_FP)
20293 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20294 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20300 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20306 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20310 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20315 gcc_unreachable ();
20323 /* Return the test that should be put into the flags user, i.e.
20324 the bcc, scc, or cmov instruction. */
20325 return gen_rtx_fmt_ee (code, VOIDmode,
20326 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20331 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20335 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20336 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20338 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20340 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20341 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20344 ret = ix86_expand_int_compare (code, op0, op1);
20350 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20352 machine_mode mode = GET_MODE (op0);
20364 tmp = ix86_expand_compare (code, op0, op1);
20365 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20366 gen_rtx_LABEL_REF (VOIDmode, label),
20368 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20375 /* Expand DImode branch into multiple compare+branch. */
20378 rtx_code_label *label2;
20379 enum rtx_code code1, code2, code3;
20380 machine_mode submode;
20382 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20384 std::swap (op0, op1);
20385 code = swap_condition (code);
20388 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20389 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20391 submode = mode == DImode ? SImode : DImode;
20393 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20394 avoid two branches. This costs one extra insn, so disable when
20395 optimizing for size. */
20397 if ((code == EQ || code == NE)
20398 && (!optimize_insn_for_size_p ()
20399 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20404 if (hi[1] != const0_rtx)
20405 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20406 NULL_RTX, 0, OPTAB_WIDEN);
20409 if (lo[1] != const0_rtx)
20410 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20411 NULL_RTX, 0, OPTAB_WIDEN);
20413 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20414 NULL_RTX, 0, OPTAB_WIDEN);
20416 ix86_expand_branch (code, tmp, const0_rtx, label);
20420 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20421 op1 is a constant and the low word is zero, then we can just
20422 examine the high word. Similarly for low word -1 and
20423 less-or-equal-than or greater-than. */
20425 if (CONST_INT_P (hi[1]))
20428 case LT: case LTU: case GE: case GEU:
20429 if (lo[1] == const0_rtx)
20431 ix86_expand_branch (code, hi[0], hi[1], label);
20435 case LE: case LEU: case GT: case GTU:
20436 if (lo[1] == constm1_rtx)
20438 ix86_expand_branch (code, hi[0], hi[1], label);
20446 /* Otherwise, we need two or three jumps. */
20448 label2 = gen_label_rtx ();
20451 code2 = swap_condition (code);
20452 code3 = unsigned_condition (code);
20456 case LT: case GT: case LTU: case GTU:
20459 case LE: code1 = LT; code2 = GT; break;
20460 case GE: code1 = GT; code2 = LT; break;
20461 case LEU: code1 = LTU; code2 = GTU; break;
20462 case GEU: code1 = GTU; code2 = LTU; break;
20464 case EQ: code1 = UNKNOWN; code2 = NE; break;
20465 case NE: code2 = UNKNOWN; break;
20468 gcc_unreachable ();
20473 * if (hi(a) < hi(b)) goto true;
20474 * if (hi(a) > hi(b)) goto false;
20475 * if (lo(a) < lo(b)) goto true;
20479 if (code1 != UNKNOWN)
20480 ix86_expand_branch (code1, hi[0], hi[1], label);
20481 if (code2 != UNKNOWN)
20482 ix86_expand_branch (code2, hi[0], hi[1], label2);
20484 ix86_expand_branch (code3, lo[0], lo[1], label);
20486 if (code2 != UNKNOWN)
20487 emit_label (label2);
20492 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20497 /* Split branch based on floating point condition. */
20499 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20500 rtx target1, rtx target2, rtx tmp)
20505 if (target2 != pc_rtx)
20507 std::swap (target1, target2);
20508 code = reverse_condition_maybe_unordered (code);
20511 condition = ix86_expand_fp_compare (code, op1, op2,
20514 i = emit_jump_insn (gen_rtx_SET
20516 gen_rtx_IF_THEN_ELSE (VOIDmode,
20517 condition, target1, target2)));
20518 if (split_branch_probability >= 0)
20519 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20523 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20527 gcc_assert (GET_MODE (dest) == QImode);
20529 ret = ix86_expand_compare (code, op0, op1);
20530 PUT_MODE (ret, QImode);
20531 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20534 /* Expand comparison setting or clearing carry flag. Return true when
20535 successful and set pop for the operation. */
20537 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20539 machine_mode mode =
20540 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20542 /* Do not handle double-mode compares that go through special path. */
20543 if (mode == (TARGET_64BIT ? TImode : DImode))
20546 if (SCALAR_FLOAT_MODE_P (mode))
20549 rtx_insn *compare_seq;
20551 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20553 /* Shortcut: following common codes never translate
20554 into carry flag compares. */
20555 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20556 || code == ORDERED || code == UNORDERED)
20559 /* These comparisons require zero flag; swap operands so they won't. */
20560 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20561 && !TARGET_IEEE_FP)
20563 std::swap (op0, op1);
20564 code = swap_condition (code);
20567 /* Try to expand the comparison and verify that we end up with
20568 carry flag based comparison. This fails to be true only when
20569 we decide to expand comparison using arithmetic that is not
20570 too common scenario. */
20572 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20573 compare_seq = get_insns ();
20576 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20577 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20578 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20580 code = GET_CODE (compare_op);
20582 if (code != LTU && code != GEU)
20585 emit_insn (compare_seq);
20590 if (!INTEGRAL_MODE_P (mode))
20599 /* Convert a==0 into (unsigned)a<1. */
20602 if (op1 != const0_rtx)
20605 code = (code == EQ ? LTU : GEU);
20608 /* Convert a>b into b<a or a>=b-1. */
20611 if (CONST_INT_P (op1))
20613 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20614 /* Bail out on overflow. We still can swap operands but that
20615 would force loading of the constant into register. */
20616 if (op1 == const0_rtx
20617 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20619 code = (code == GTU ? GEU : LTU);
20623 std::swap (op0, op1);
20624 code = (code == GTU ? LTU : GEU);
20628 /* Convert a>=0 into (unsigned)a<0x80000000. */
20631 if (mode == DImode || op1 != const0_rtx)
20633 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20634 code = (code == LT ? GEU : LTU);
20638 if (mode == DImode || op1 != constm1_rtx)
20640 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20641 code = (code == LE ? GEU : LTU);
20647 /* Swapping operands may cause constant to appear as first operand. */
20648 if (!nonimmediate_operand (op0, VOIDmode))
20650 if (!can_create_pseudo_p ())
20652 op0 = force_reg (mode, op0);
20654 *pop = ix86_expand_compare (code, op0, op1);
20655 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20660 ix86_expand_int_movcc (rtx operands[])
20662 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20663 rtx_insn *compare_seq;
20665 machine_mode mode = GET_MODE (operands[0]);
20666 bool sign_bit_compare_p = false;
20667 rtx op0 = XEXP (operands[1], 0);
20668 rtx op1 = XEXP (operands[1], 1);
20670 if (GET_MODE (op0) == TImode
20671 || (GET_MODE (op0) == DImode
20676 compare_op = ix86_expand_compare (code, op0, op1);
20677 compare_seq = get_insns ();
20680 compare_code = GET_CODE (compare_op);
20682 if ((op1 == const0_rtx && (code == GE || code == LT))
20683 || (op1 == constm1_rtx && (code == GT || code == LE)))
20684 sign_bit_compare_p = true;
20686 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20687 HImode insns, we'd be swallowed in word prefix ops. */
20689 if ((mode != HImode || TARGET_FAST_PREFIX)
20690 && (mode != (TARGET_64BIT ? TImode : DImode))
20691 && CONST_INT_P (operands[2])
20692 && CONST_INT_P (operands[3]))
20694 rtx out = operands[0];
20695 HOST_WIDE_INT ct = INTVAL (operands[2]);
20696 HOST_WIDE_INT cf = INTVAL (operands[3]);
20697 HOST_WIDE_INT diff;
20700 /* Sign bit compares are better done using shifts than we do by using
20702 if (sign_bit_compare_p
20703 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20705 /* Detect overlap between destination and compare sources. */
20708 if (!sign_bit_compare_p)
20711 bool fpcmp = false;
20713 compare_code = GET_CODE (compare_op);
20715 flags = XEXP (compare_op, 0);
20717 if (GET_MODE (flags) == CCFPmode
20718 || GET_MODE (flags) == CCFPUmode)
20722 = ix86_fp_compare_code_to_integer (compare_code);
20725 /* To simplify rest of code, restrict to the GEU case. */
20726 if (compare_code == LTU)
20728 std::swap (ct, cf);
20729 compare_code = reverse_condition (compare_code);
20730 code = reverse_condition (code);
20735 PUT_CODE (compare_op,
20736 reverse_condition_maybe_unordered
20737 (GET_CODE (compare_op)));
20739 PUT_CODE (compare_op,
20740 reverse_condition (GET_CODE (compare_op)));
20744 if (reg_overlap_mentioned_p (out, op0)
20745 || reg_overlap_mentioned_p (out, op1))
20746 tmp = gen_reg_rtx (mode);
20748 if (mode == DImode)
20749 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20751 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20752 flags, compare_op));
20756 if (code == GT || code == GE)
20757 code = reverse_condition (code);
20760 std::swap (ct, cf);
20763 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20776 tmp = expand_simple_binop (mode, PLUS,
20778 copy_rtx (tmp), 1, OPTAB_DIRECT);
20789 tmp = expand_simple_binop (mode, IOR,
20791 copy_rtx (tmp), 1, OPTAB_DIRECT);
20793 else if (diff == -1 && ct)
20803 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20805 tmp = expand_simple_binop (mode, PLUS,
20806 copy_rtx (tmp), GEN_INT (cf),
20807 copy_rtx (tmp), 1, OPTAB_DIRECT);
20815 * andl cf - ct, dest
20825 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20828 tmp = expand_simple_binop (mode, AND,
20830 gen_int_mode (cf - ct, mode),
20831 copy_rtx (tmp), 1, OPTAB_DIRECT);
20833 tmp = expand_simple_binop (mode, PLUS,
20834 copy_rtx (tmp), GEN_INT (ct),
20835 copy_rtx (tmp), 1, OPTAB_DIRECT);
20838 if (!rtx_equal_p (tmp, out))
20839 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20846 machine_mode cmp_mode = GET_MODE (op0);
20847 enum rtx_code new_code;
20849 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20851 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20853 /* We may be reversing unordered compare to normal compare, that
20854 is not valid in general (we may convert non-trapping condition
20855 to trapping one), however on i386 we currently emit all
20856 comparisons unordered. */
20857 new_code = reverse_condition_maybe_unordered (code);
20860 new_code = ix86_reverse_condition (code, cmp_mode);
20861 if (new_code != UNKNOWN)
20863 std::swap (ct, cf);
20869 compare_code = UNKNOWN;
20870 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20871 && CONST_INT_P (op1))
20873 if (op1 == const0_rtx
20874 && (code == LT || code == GE))
20875 compare_code = code;
20876 else if (op1 == constm1_rtx)
20880 else if (code == GT)
20885 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20886 if (compare_code != UNKNOWN
20887 && GET_MODE (op0) == GET_MODE (out)
20888 && (cf == -1 || ct == -1))
20890 /* If lea code below could be used, only optimize
20891 if it results in a 2 insn sequence. */
20893 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20894 || diff == 3 || diff == 5 || diff == 9)
20895 || (compare_code == LT && ct == -1)
20896 || (compare_code == GE && cf == -1))
20899 * notl op1 (if necessary)
20907 code = reverse_condition (code);
20910 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20912 out = expand_simple_binop (mode, IOR,
20914 out, 1, OPTAB_DIRECT);
20915 if (out != operands[0])
20916 emit_move_insn (operands[0], out);
20923 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20924 || diff == 3 || diff == 5 || diff == 9)
20925 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20927 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20933 * lea cf(dest*(ct-cf)),dest
20937 * This also catches the degenerate setcc-only case.
20943 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20946 /* On x86_64 the lea instruction operates on Pmode, so we need
20947 to get arithmetics done in proper mode to match. */
20949 tmp = copy_rtx (out);
20953 out1 = copy_rtx (out);
20954 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20958 tmp = gen_rtx_PLUS (mode, tmp, out1);
20964 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20967 if (!rtx_equal_p (tmp, out))
20970 out = force_operand (tmp, copy_rtx (out));
20972 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20974 if (!rtx_equal_p (out, operands[0]))
20975 emit_move_insn (operands[0], copy_rtx (out));
20981 * General case: Jumpful:
20982 * xorl dest,dest cmpl op1, op2
20983 * cmpl op1, op2 movl ct, dest
20984 * setcc dest jcc 1f
20985 * decl dest movl cf, dest
20986 * andl (cf-ct),dest 1:
20989 * Size 20. Size 14.
20991 * This is reasonably steep, but branch mispredict costs are
20992 * high on modern cpus, so consider failing only if optimizing
20996 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20997 && BRANCH_COST (optimize_insn_for_speed_p (),
21002 machine_mode cmp_mode = GET_MODE (op0);
21003 enum rtx_code new_code;
21005 if (SCALAR_FLOAT_MODE_P (cmp_mode))
21007 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
21009 /* We may be reversing unordered compare to normal compare,
21010 that is not valid in general (we may convert non-trapping
21011 condition to trapping one), however on i386 we currently
21012 emit all comparisons unordered. */
21013 new_code = reverse_condition_maybe_unordered (code);
21017 new_code = ix86_reverse_condition (code, cmp_mode);
21018 if (compare_code != UNKNOWN && new_code != UNKNOWN)
21019 compare_code = reverse_condition (compare_code);
21022 if (new_code != UNKNOWN)
21030 if (compare_code != UNKNOWN)
21032 /* notl op1 (if needed)
21037 For x < 0 (resp. x <= -1) there will be no notl,
21038 so if possible swap the constants to get rid of the
21040 True/false will be -1/0 while code below (store flag
21041 followed by decrement) is 0/-1, so the constants need
21042 to be exchanged once more. */
21044 if (compare_code == GE || !cf)
21046 code = reverse_condition (code);
21050 std::swap (ct, cf);
21052 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21056 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21058 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21060 copy_rtx (out), 1, OPTAB_DIRECT);
21063 out = expand_simple_binop (mode, AND, copy_rtx (out),
21064 gen_int_mode (cf - ct, mode),
21065 copy_rtx (out), 1, OPTAB_DIRECT);
21067 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21068 copy_rtx (out), 1, OPTAB_DIRECT);
21069 if (!rtx_equal_p (out, operands[0]))
21070 emit_move_insn (operands[0], copy_rtx (out));
21076 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21078 /* Try a few things more with specific constants and a variable. */
21081 rtx var, orig_out, out, tmp;
21083 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21086 /* If one of the two operands is an interesting constant, load a
21087 constant with the above and mask it in with a logical operation. */
21089 if (CONST_INT_P (operands[2]))
21092 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21093 operands[3] = constm1_rtx, op = and_optab;
21094 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21095 operands[3] = const0_rtx, op = ior_optab;
21099 else if (CONST_INT_P (operands[3]))
21102 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21103 operands[2] = constm1_rtx, op = and_optab;
21104 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21105 operands[2] = const0_rtx, op = ior_optab;
21112 orig_out = operands[0];
21113 tmp = gen_reg_rtx (mode);
21116 /* Recurse to get the constant loaded. */
21117 if (ix86_expand_int_movcc (operands) == 0)
21120 /* Mask in the interesting variable. */
21121 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21123 if (!rtx_equal_p (out, orig_out))
21124 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21130 * For comparison with above,
21140 if (! nonimmediate_operand (operands[2], mode))
21141 operands[2] = force_reg (mode, operands[2]);
21142 if (! nonimmediate_operand (operands[3], mode))
21143 operands[3] = force_reg (mode, operands[3]);
21145 if (! register_operand (operands[2], VOIDmode)
21147 || ! register_operand (operands[3], VOIDmode)))
21148 operands[2] = force_reg (mode, operands[2]);
21151 && ! register_operand (operands[3], VOIDmode))
21152 operands[3] = force_reg (mode, operands[3]);
21154 emit_insn (compare_seq);
21155 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21156 gen_rtx_IF_THEN_ELSE (mode,
21157 compare_op, operands[2],
21162 /* Swap, force into registers, or otherwise massage the two operands
21163 to an sse comparison with a mask result. Thus we differ a bit from
21164 ix86_prepare_fp_compare_args which expects to produce a flags result.
21166 The DEST operand exists to help determine whether to commute commutative
21167 operators. The POP0/POP1 operands are updated in place. The new
21168 comparison code is returned, or UNKNOWN if not implementable. */
21170 static enum rtx_code
21171 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21172 rtx *pop0, rtx *pop1)
21178 /* AVX supports all the needed comparisons. */
21181 /* We have no LTGT as an operator. We could implement it with
21182 NE & ORDERED, but this requires an extra temporary. It's
21183 not clear that it's worth it. */
21190 /* These are supported directly. */
21197 /* AVX has 3 operand comparisons, no need to swap anything. */
21200 /* For commutative operators, try to canonicalize the destination
21201 operand to be first in the comparison - this helps reload to
21202 avoid extra moves. */
21203 if (!dest || !rtx_equal_p (dest, *pop1))
21211 /* These are not supported directly before AVX, and furthermore
21212 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21213 comparison operands to transform into something that is
21215 std::swap (*pop0, *pop1);
21216 code = swap_condition (code);
21220 gcc_unreachable ();
21226 /* Detect conditional moves that exactly match min/max operational
21227 semantics. Note that this is IEEE safe, as long as we don't
21228 interchange the operands.
21230 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21231 and TRUE if the operation is successful and instructions are emitted. */
21234 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21235 rtx cmp_op1, rtx if_true, rtx if_false)
21243 else if (code == UNGE)
21244 std::swap (if_true, if_false);
21248 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21250 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21255 mode = GET_MODE (dest);
21257 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21258 but MODE may be a vector mode and thus not appropriate. */
21259 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21261 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21264 if_true = force_reg (mode, if_true);
21265 v = gen_rtvec (2, if_true, if_false);
21266 tmp = gen_rtx_UNSPEC (mode, v, u);
21270 code = is_min ? SMIN : SMAX;
21271 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21274 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
21278 /* Expand an sse vector comparison. Return the register with the result. */
21281 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21282 rtx op_true, rtx op_false)
21284 machine_mode mode = GET_MODE (dest);
21285 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21287 /* In general case result of comparison can differ from operands' type. */
21288 machine_mode cmp_mode;
21290 /* In AVX512F the result of comparison is an integer mask. */
21291 bool maskcmp = false;
21294 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21296 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21297 gcc_assert (cmp_mode != BLKmode);
21302 cmp_mode = cmp_ops_mode;
21305 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21306 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21307 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21310 || reg_overlap_mentioned_p (dest, op_true)
21311 || reg_overlap_mentioned_p (dest, op_false))
21312 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21314 /* Compare patterns for int modes are unspec in AVX512F only. */
21315 if (maskcmp && (code == GT || code == EQ))
21317 rtx (*gen)(rtx, rtx, rtx);
21319 switch (cmp_ops_mode)
21322 gcc_assert (TARGET_AVX512BW);
21323 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
21326 gcc_assert (TARGET_AVX512BW);
21327 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
21330 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21333 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21341 emit_insn (gen (dest, cmp_op0, cmp_op1));
21345 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21347 if (cmp_mode != mode && !maskcmp)
21349 x = force_reg (cmp_ops_mode, x);
21350 convert_move (dest, x, false);
21353 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21358 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21359 operations. This is used for both scalar and vector conditional moves. */
21362 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21364 machine_mode mode = GET_MODE (dest);
21365 machine_mode cmpmode = GET_MODE (cmp);
21367 /* In AVX512F the result of comparison is an integer mask. */
21368 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21372 if (vector_all_ones_operand (op_true, mode)
21373 && rtx_equal_p (op_false, CONST0_RTX (mode))
21376 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
21378 else if (op_false == CONST0_RTX (mode)
21381 op_true = force_reg (mode, op_true);
21382 x = gen_rtx_AND (mode, cmp, op_true);
21383 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21385 else if (op_true == CONST0_RTX (mode)
21388 op_false = force_reg (mode, op_false);
21389 x = gen_rtx_NOT (mode, cmp);
21390 x = gen_rtx_AND (mode, x, op_false);
21391 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21393 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21396 op_false = force_reg (mode, op_false);
21397 x = gen_rtx_IOR (mode, cmp, op_false);
21398 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21400 else if (TARGET_XOP
21403 op_true = force_reg (mode, op_true);
21405 if (!nonimmediate_operand (op_false, mode))
21406 op_false = force_reg (mode, op_false);
21408 emit_insn (gen_rtx_SET (mode, dest,
21409 gen_rtx_IF_THEN_ELSE (mode, cmp,
21415 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21418 if (!nonimmediate_operand (op_true, mode))
21419 op_true = force_reg (mode, op_true);
21421 op_false = force_reg (mode, op_false);
21427 gen = gen_sse4_1_blendvps;
21431 gen = gen_sse4_1_blendvpd;
21439 gen = gen_sse4_1_pblendvb;
21440 if (mode != V16QImode)
21441 d = gen_reg_rtx (V16QImode);
21442 op_false = gen_lowpart (V16QImode, op_false);
21443 op_true = gen_lowpart (V16QImode, op_true);
21444 cmp = gen_lowpart (V16QImode, cmp);
21449 gen = gen_avx_blendvps256;
21453 gen = gen_avx_blendvpd256;
21461 gen = gen_avx2_pblendvb;
21462 if (mode != V32QImode)
21463 d = gen_reg_rtx (V32QImode);
21464 op_false = gen_lowpart (V32QImode, op_false);
21465 op_true = gen_lowpart (V32QImode, op_true);
21466 cmp = gen_lowpart (V32QImode, cmp);
21471 gen = gen_avx512bw_blendmv64qi;
21474 gen = gen_avx512bw_blendmv32hi;
21477 gen = gen_avx512f_blendmv16si;
21480 gen = gen_avx512f_blendmv8di;
21483 gen = gen_avx512f_blendmv8df;
21486 gen = gen_avx512f_blendmv16sf;
21495 emit_insn (gen (d, op_false, op_true, cmp));
21497 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21501 op_true = force_reg (mode, op_true);
21503 t2 = gen_reg_rtx (mode);
21505 t3 = gen_reg_rtx (mode);
21509 x = gen_rtx_AND (mode, op_true, cmp);
21510 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21512 x = gen_rtx_NOT (mode, cmp);
21513 x = gen_rtx_AND (mode, x, op_false);
21514 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21516 x = gen_rtx_IOR (mode, t3, t2);
21517 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21522 /* Expand a floating-point conditional move. Return true if successful. */
21525 ix86_expand_fp_movcc (rtx operands[])
21527 machine_mode mode = GET_MODE (operands[0]);
21528 enum rtx_code code = GET_CODE (operands[1]);
21529 rtx tmp, compare_op;
21530 rtx op0 = XEXP (operands[1], 0);
21531 rtx op1 = XEXP (operands[1], 1);
21533 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21535 machine_mode cmode;
21537 /* Since we've no cmove for sse registers, don't force bad register
21538 allocation just to gain access to it. Deny movcc when the
21539 comparison mode doesn't match the move mode. */
21540 cmode = GET_MODE (op0);
21541 if (cmode == VOIDmode)
21542 cmode = GET_MODE (op1);
21546 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21547 if (code == UNKNOWN)
21550 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21551 operands[2], operands[3]))
21554 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21555 operands[2], operands[3]);
21556 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21560 if (GET_MODE (op0) == TImode
21561 || (GET_MODE (op0) == DImode
21565 /* The floating point conditional move instructions don't directly
21566 support conditions resulting from a signed integer comparison. */
21568 compare_op = ix86_expand_compare (code, op0, op1);
21569 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21571 tmp = gen_reg_rtx (QImode);
21572 ix86_expand_setcc (tmp, code, op0, op1);
21574 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21577 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21578 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21579 operands[2], operands[3])));
21584 /* Expand a floating-point vector conditional move; a vcond operation
21585 rather than a movcc operation. */
21588 ix86_expand_fp_vcond (rtx operands[])
21590 enum rtx_code code = GET_CODE (operands[3]);
21593 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21594 &operands[4], &operands[5]);
21595 if (code == UNKNOWN)
21598 switch (GET_CODE (operands[3]))
21601 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21602 operands[5], operands[0], operands[0]);
21603 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21604 operands[5], operands[1], operands[2]);
21608 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21609 operands[5], operands[0], operands[0]);
21610 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21611 operands[5], operands[1], operands[2]);
21615 gcc_unreachable ();
21617 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21619 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21623 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21624 operands[5], operands[1], operands[2]))
21627 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21628 operands[1], operands[2]);
21629 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21633 /* Expand a signed/unsigned integral vector conditional move. */
21636 ix86_expand_int_vcond (rtx operands[])
21638 machine_mode data_mode = GET_MODE (operands[0]);
21639 machine_mode mode = GET_MODE (operands[4]);
21640 enum rtx_code code = GET_CODE (operands[3]);
21641 bool negate = false;
21644 cop0 = operands[4];
21645 cop1 = operands[5];
21647 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21648 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21649 if ((code == LT || code == GE)
21650 && data_mode == mode
21651 && cop1 == CONST0_RTX (mode)
21652 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21653 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21654 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21655 && (GET_MODE_SIZE (data_mode) == 16
21656 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21658 rtx negop = operands[2 - (code == LT)];
21659 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21660 if (negop == CONST1_RTX (data_mode))
21662 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21663 operands[0], 1, OPTAB_DIRECT);
21664 if (res != operands[0])
21665 emit_move_insn (operands[0], res);
21668 else if (GET_MODE_INNER (data_mode) != DImode
21669 && vector_all_ones_operand (negop, data_mode))
21671 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21672 operands[0], 0, OPTAB_DIRECT);
21673 if (res != operands[0])
21674 emit_move_insn (operands[0], res);
21679 if (!nonimmediate_operand (cop1, mode))
21680 cop1 = force_reg (mode, cop1);
21681 if (!general_operand (operands[1], data_mode))
21682 operands[1] = force_reg (data_mode, operands[1]);
21683 if (!general_operand (operands[2], data_mode))
21684 operands[2] = force_reg (data_mode, operands[2]);
21686 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21688 && (mode == V16QImode || mode == V8HImode
21689 || mode == V4SImode || mode == V2DImode))
21693 /* Canonicalize the comparison to EQ, GT, GTU. */
21704 code = reverse_condition (code);
21710 code = reverse_condition (code);
21716 std::swap (cop0, cop1);
21717 code = swap_condition (code);
21721 gcc_unreachable ();
21724 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21725 if (mode == V2DImode)
21730 /* SSE4.1 supports EQ. */
21731 if (!TARGET_SSE4_1)
21737 /* SSE4.2 supports GT/GTU. */
21738 if (!TARGET_SSE4_2)
21743 gcc_unreachable ();
21747 /* Unsigned parallel compare is not supported by the hardware.
21748 Play some tricks to turn this into a signed comparison
21752 cop0 = force_reg (mode, cop0);
21764 rtx (*gen_sub3) (rtx, rtx, rtx);
21768 case V16SImode: gen_sub3 = gen_subv16si3; break;
21769 case V8DImode: gen_sub3 = gen_subv8di3; break;
21770 case V8SImode: gen_sub3 = gen_subv8si3; break;
21771 case V4DImode: gen_sub3 = gen_subv4di3; break;
21772 case V4SImode: gen_sub3 = gen_subv4si3; break;
21773 case V2DImode: gen_sub3 = gen_subv2di3; break;
21775 gcc_unreachable ();
21777 /* Subtract (-(INT MAX) - 1) from both operands to make
21779 mask = ix86_build_signbit_mask (mode, true, false);
21780 t1 = gen_reg_rtx (mode);
21781 emit_insn (gen_sub3 (t1, cop0, mask));
21783 t2 = gen_reg_rtx (mode);
21784 emit_insn (gen_sub3 (t2, cop1, mask));
21798 /* Perform a parallel unsigned saturating subtraction. */
21799 x = gen_reg_rtx (mode);
21800 emit_insn (gen_rtx_SET (VOIDmode, x,
21801 gen_rtx_US_MINUS (mode, cop0, cop1)));
21804 cop1 = CONST0_RTX (mode);
21810 gcc_unreachable ();
21815 /* Allow the comparison to be done in one mode, but the movcc to
21816 happen in another mode. */
21817 if (data_mode == mode)
21819 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21820 operands[1+negate], operands[2-negate]);
21824 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21825 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21826 operands[1+negate], operands[2-negate]);
21827 if (GET_MODE (x) == mode)
21828 x = gen_lowpart (data_mode, x);
21831 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21832 operands[2-negate]);
21836 /* AVX512F does support 64-byte integer vector operations,
21837 thus the longest vector we are faced with is V64QImode. */
21838 #define MAX_VECT_LEN 64
21840 struct expand_vec_perm_d
21842 rtx target, op0, op1;
21843 unsigned char perm[MAX_VECT_LEN];
21844 machine_mode vmode;
21845 unsigned char nelt;
21846 bool one_operand_p;
21851 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21852 struct expand_vec_perm_d *d)
21854 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21855 expander, so args are either in d, or in op0, op1 etc. */
21856 machine_mode mode = GET_MODE (d ? d->op0 : op0);
21857 machine_mode maskmode = mode;
21858 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21863 if (TARGET_AVX512VL && TARGET_AVX512BW)
21864 gen = gen_avx512vl_vpermi2varv8hi3;
21867 if (TARGET_AVX512VL && TARGET_AVX512BW)
21868 gen = gen_avx512vl_vpermi2varv16hi3;
21871 if (TARGET_AVX512VBMI)
21872 gen = gen_avx512bw_vpermi2varv64qi3;
21875 if (TARGET_AVX512BW)
21876 gen = gen_avx512bw_vpermi2varv32hi3;
21879 if (TARGET_AVX512VL)
21880 gen = gen_avx512vl_vpermi2varv4si3;
21883 if (TARGET_AVX512VL)
21884 gen = gen_avx512vl_vpermi2varv8si3;
21887 if (TARGET_AVX512F)
21888 gen = gen_avx512f_vpermi2varv16si3;
21891 if (TARGET_AVX512VL)
21893 gen = gen_avx512vl_vpermi2varv4sf3;
21894 maskmode = V4SImode;
21898 if (TARGET_AVX512VL)
21900 gen = gen_avx512vl_vpermi2varv8sf3;
21901 maskmode = V8SImode;
21905 if (TARGET_AVX512F)
21907 gen = gen_avx512f_vpermi2varv16sf3;
21908 maskmode = V16SImode;
21912 if (TARGET_AVX512VL)
21913 gen = gen_avx512vl_vpermi2varv2di3;
21916 if (TARGET_AVX512VL)
21917 gen = gen_avx512vl_vpermi2varv4di3;
21920 if (TARGET_AVX512F)
21921 gen = gen_avx512f_vpermi2varv8di3;
21924 if (TARGET_AVX512VL)
21926 gen = gen_avx512vl_vpermi2varv2df3;
21927 maskmode = V2DImode;
21931 if (TARGET_AVX512VL)
21933 gen = gen_avx512vl_vpermi2varv4df3;
21934 maskmode = V4DImode;
21938 if (TARGET_AVX512F)
21940 gen = gen_avx512f_vpermi2varv8df3;
21941 maskmode = V8DImode;
21951 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21952 expander, so args are either in d, or in op0, op1 etc. */
21956 target = d->target;
21959 for (int i = 0; i < d->nelt; ++i)
21960 vec[i] = GEN_INT (d->perm[i]);
21961 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
21964 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
21968 /* Expand a variable vector permutation. */
21971 ix86_expand_vec_perm (rtx operands[])
21973 rtx target = operands[0];
21974 rtx op0 = operands[1];
21975 rtx op1 = operands[2];
21976 rtx mask = operands[3];
21977 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21978 machine_mode mode = GET_MODE (op0);
21979 machine_mode maskmode = GET_MODE (mask);
21981 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21983 /* Number of elements in the vector. */
21984 w = GET_MODE_NUNITS (mode);
21985 e = GET_MODE_UNIT_SIZE (mode);
21986 gcc_assert (w <= 64);
21988 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
21993 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21995 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21996 an constant shuffle operand. With a tiny bit of effort we can
21997 use VPERMD instead. A re-interpretation stall for V4DFmode is
21998 unfortunate but there's no avoiding it.
21999 Similarly for V16HImode we don't have instructions for variable
22000 shuffling, while for V32QImode we can use after preparing suitable
22001 masks vpshufb; vpshufb; vpermq; vpor. */
22003 if (mode == V16HImode)
22005 maskmode = mode = V32QImode;
22011 maskmode = mode = V8SImode;
22015 t1 = gen_reg_rtx (maskmode);
22017 /* Replicate the low bits of the V4DImode mask into V8SImode:
22019 t1 = { A A B B C C D D }. */
22020 for (i = 0; i < w / 2; ++i)
22021 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
22022 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22023 vt = force_reg (maskmode, vt);
22024 mask = gen_lowpart (maskmode, mask);
22025 if (maskmode == V8SImode)
22026 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
22028 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
22030 /* Multiply the shuffle indicies by two. */
22031 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
22034 /* Add one to the odd shuffle indicies:
22035 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
22036 for (i = 0; i < w / 2; ++i)
22038 vec[i * 2] = const0_rtx;
22039 vec[i * 2 + 1] = const1_rtx;
22041 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22042 vt = validize_mem (force_const_mem (maskmode, vt));
22043 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22046 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
22047 operands[3] = mask = t1;
22048 target = gen_reg_rtx (mode);
22049 op0 = gen_lowpart (mode, op0);
22050 op1 = gen_lowpart (mode, op1);
22056 /* The VPERMD and VPERMPS instructions already properly ignore
22057 the high bits of the shuffle elements. No need for us to
22058 perform an AND ourselves. */
22059 if (one_operand_shuffle)
22061 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22062 if (target != operands[0])
22063 emit_move_insn (operands[0],
22064 gen_lowpart (GET_MODE (operands[0]), target));
22068 t1 = gen_reg_rtx (V8SImode);
22069 t2 = gen_reg_rtx (V8SImode);
22070 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22071 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22077 mask = gen_lowpart (V8SImode, mask);
22078 if (one_operand_shuffle)
22079 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22082 t1 = gen_reg_rtx (V8SFmode);
22083 t2 = gen_reg_rtx (V8SFmode);
22084 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22085 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22091 /* By combining the two 128-bit input vectors into one 256-bit
22092 input vector, we can use VPERMD and VPERMPS for the full
22093 two-operand shuffle. */
22094 t1 = gen_reg_rtx (V8SImode);
22095 t2 = gen_reg_rtx (V8SImode);
22096 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22097 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22098 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22099 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22103 t1 = gen_reg_rtx (V8SFmode);
22104 t2 = gen_reg_rtx (V8SImode);
22105 mask = gen_lowpart (V4SImode, mask);
22106 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22107 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22108 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22109 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22113 t1 = gen_reg_rtx (V32QImode);
22114 t2 = gen_reg_rtx (V32QImode);
22115 t3 = gen_reg_rtx (V32QImode);
22116 vt2 = GEN_INT (-128);
22117 for (i = 0; i < 32; i++)
22119 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22120 vt = force_reg (V32QImode, vt);
22121 for (i = 0; i < 32; i++)
22122 vec[i] = i < 16 ? vt2 : const0_rtx;
22123 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22124 vt2 = force_reg (V32QImode, vt2);
22125 /* From mask create two adjusted masks, which contain the same
22126 bits as mask in the low 7 bits of each vector element.
22127 The first mask will have the most significant bit clear
22128 if it requests element from the same 128-bit lane
22129 and MSB set if it requests element from the other 128-bit lane.
22130 The second mask will have the opposite values of the MSB,
22131 and additionally will have its 128-bit lanes swapped.
22132 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22133 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22134 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22135 stands for other 12 bytes. */
22136 /* The bit whether element is from the same lane or the other
22137 lane is bit 4, so shift it up by 3 to the MSB position. */
22138 t5 = gen_reg_rtx (V4DImode);
22139 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22141 /* Clear MSB bits from the mask just in case it had them set. */
22142 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22143 /* After this t1 will have MSB set for elements from other lane. */
22144 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22145 /* Clear bits other than MSB. */
22146 emit_insn (gen_andv32qi3 (t1, t1, vt));
22147 /* Or in the lower bits from mask into t3. */
22148 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22149 /* And invert MSB bits in t1, so MSB is set for elements from the same
22151 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22152 /* Swap 128-bit lanes in t3. */
22153 t6 = gen_reg_rtx (V4DImode);
22154 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22155 const2_rtx, GEN_INT (3),
22156 const0_rtx, const1_rtx));
22157 /* And or in the lower bits from mask into t1. */
22158 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22159 if (one_operand_shuffle)
22161 /* Each of these shuffles will put 0s in places where
22162 element from the other 128-bit lane is needed, otherwise
22163 will shuffle in the requested value. */
22164 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22165 gen_lowpart (V32QImode, t6)));
22166 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22167 /* For t3 the 128-bit lanes are swapped again. */
22168 t7 = gen_reg_rtx (V4DImode);
22169 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22170 const2_rtx, GEN_INT (3),
22171 const0_rtx, const1_rtx));
22172 /* And oring both together leads to the result. */
22173 emit_insn (gen_iorv32qi3 (target, t1,
22174 gen_lowpart (V32QImode, t7)));
22175 if (target != operands[0])
22176 emit_move_insn (operands[0],
22177 gen_lowpart (GET_MODE (operands[0]), target));
22181 t4 = gen_reg_rtx (V32QImode);
22182 /* Similarly to the above one_operand_shuffle code,
22183 just for repeated twice for each operand. merge_two:
22184 code will merge the two results together. */
22185 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22186 gen_lowpart (V32QImode, t6)));
22187 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22188 gen_lowpart (V32QImode, t6)));
22189 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22190 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22191 t7 = gen_reg_rtx (V4DImode);
22192 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22193 const2_rtx, GEN_INT (3),
22194 const0_rtx, const1_rtx));
22195 t8 = gen_reg_rtx (V4DImode);
22196 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22197 const2_rtx, GEN_INT (3),
22198 const0_rtx, const1_rtx));
22199 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22200 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22206 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22213 /* The XOP VPPERM insn supports three inputs. By ignoring the
22214 one_operand_shuffle special case, we avoid creating another
22215 set of constant vectors in memory. */
22216 one_operand_shuffle = false;
22218 /* mask = mask & {2*w-1, ...} */
22219 vt = GEN_INT (2*w - 1);
22223 /* mask = mask & {w-1, ...} */
22224 vt = GEN_INT (w - 1);
22227 for (i = 0; i < w; i++)
22229 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22230 mask = expand_simple_binop (maskmode, AND, mask, vt,
22231 NULL_RTX, 0, OPTAB_DIRECT);
22233 /* For non-QImode operations, convert the word permutation control
22234 into a byte permutation control. */
22235 if (mode != V16QImode)
22237 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22238 GEN_INT (exact_log2 (e)),
22239 NULL_RTX, 0, OPTAB_DIRECT);
22241 /* Convert mask to vector of chars. */
22242 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22244 /* Replicate each of the input bytes into byte positions:
22245 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22246 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22247 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22248 for (i = 0; i < 16; ++i)
22249 vec[i] = GEN_INT (i/e * e);
22250 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22251 vt = validize_mem (force_const_mem (V16QImode, vt));
22253 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22255 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22257 /* Convert it into the byte positions by doing
22258 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22259 for (i = 0; i < 16; ++i)
22260 vec[i] = GEN_INT (i % e);
22261 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22262 vt = validize_mem (force_const_mem (V16QImode, vt));
22263 emit_insn (gen_addv16qi3 (mask, mask, vt));
22266 /* The actual shuffle operations all operate on V16QImode. */
22267 op0 = gen_lowpart (V16QImode, op0);
22268 op1 = gen_lowpart (V16QImode, op1);
22272 if (GET_MODE (target) != V16QImode)
22273 target = gen_reg_rtx (V16QImode);
22274 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22275 if (target != operands[0])
22276 emit_move_insn (operands[0],
22277 gen_lowpart (GET_MODE (operands[0]), target));
22279 else if (one_operand_shuffle)
22281 if (GET_MODE (target) != V16QImode)
22282 target = gen_reg_rtx (V16QImode);
22283 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22284 if (target != operands[0])
22285 emit_move_insn (operands[0],
22286 gen_lowpart (GET_MODE (operands[0]), target));
22293 /* Shuffle the two input vectors independently. */
22294 t1 = gen_reg_rtx (V16QImode);
22295 t2 = gen_reg_rtx (V16QImode);
22296 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22297 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22300 /* Then merge them together. The key is whether any given control
22301 element contained a bit set that indicates the second word. */
22302 mask = operands[3];
22304 if (maskmode == V2DImode && !TARGET_SSE4_1)
22306 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22307 more shuffle to convert the V2DI input mask into a V4SI
22308 input mask. At which point the masking that expand_int_vcond
22309 will work as desired. */
22310 rtx t3 = gen_reg_rtx (V4SImode);
22311 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22312 const0_rtx, const0_rtx,
22313 const2_rtx, const2_rtx));
22315 maskmode = V4SImode;
22319 for (i = 0; i < w; i++)
22321 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22322 vt = force_reg (maskmode, vt);
22323 mask = expand_simple_binop (maskmode, AND, mask, vt,
22324 NULL_RTX, 0, OPTAB_DIRECT);
22326 if (GET_MODE (target) != mode)
22327 target = gen_reg_rtx (mode);
22329 xops[1] = gen_lowpart (mode, t2);
22330 xops[2] = gen_lowpart (mode, t1);
22331 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22334 ok = ix86_expand_int_vcond (xops);
22336 if (target != operands[0])
22337 emit_move_insn (operands[0],
22338 gen_lowpart (GET_MODE (operands[0]), target));
22342 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22343 true if we should do zero extension, else sign extension. HIGH_P is
22344 true if we want the N/2 high elements, else the low elements. */
22347 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22349 machine_mode imode = GET_MODE (src);
22354 rtx (*unpack)(rtx, rtx);
22355 rtx (*extract)(rtx, rtx) = NULL;
22356 machine_mode halfmode = BLKmode;
22362 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22364 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22365 halfmode = V32QImode;
22367 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22371 unpack = gen_avx2_zero_extendv16qiv16hi2;
22373 unpack = gen_avx2_sign_extendv16qiv16hi2;
22374 halfmode = V16QImode;
22376 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22380 unpack = gen_avx512f_zero_extendv16hiv16si2;
22382 unpack = gen_avx512f_sign_extendv16hiv16si2;
22383 halfmode = V16HImode;
22385 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22389 unpack = gen_avx2_zero_extendv8hiv8si2;
22391 unpack = gen_avx2_sign_extendv8hiv8si2;
22392 halfmode = V8HImode;
22394 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22398 unpack = gen_avx512f_zero_extendv8siv8di2;
22400 unpack = gen_avx512f_sign_extendv8siv8di2;
22401 halfmode = V8SImode;
22403 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22407 unpack = gen_avx2_zero_extendv4siv4di2;
22409 unpack = gen_avx2_sign_extendv4siv4di2;
22410 halfmode = V4SImode;
22412 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22416 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22418 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22422 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22424 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22428 unpack = gen_sse4_1_zero_extendv2siv2di2;
22430 unpack = gen_sse4_1_sign_extendv2siv2di2;
22433 gcc_unreachable ();
22436 if (GET_MODE_SIZE (imode) >= 32)
22438 tmp = gen_reg_rtx (halfmode);
22439 emit_insn (extract (tmp, src));
22443 /* Shift higher 8 bytes to lower 8 bytes. */
22444 tmp = gen_reg_rtx (V1TImode);
22445 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22447 tmp = gen_lowpart (imode, tmp);
22452 emit_insn (unpack (dest, tmp));
22456 rtx (*unpack)(rtx, rtx, rtx);
22462 unpack = gen_vec_interleave_highv16qi;
22464 unpack = gen_vec_interleave_lowv16qi;
22468 unpack = gen_vec_interleave_highv8hi;
22470 unpack = gen_vec_interleave_lowv8hi;
22474 unpack = gen_vec_interleave_highv4si;
22476 unpack = gen_vec_interleave_lowv4si;
22479 gcc_unreachable ();
22483 tmp = force_reg (imode, CONST0_RTX (imode));
22485 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22486 src, pc_rtx, pc_rtx);
22488 rtx tmp2 = gen_reg_rtx (imode);
22489 emit_insn (unpack (tmp2, src, tmp));
22490 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22494 /* Expand conditional increment or decrement using adb/sbb instructions.
22495 The default case using setcc followed by the conditional move can be
22496 done by generic code. */
22498 ix86_expand_int_addcc (rtx operands[])
22500 enum rtx_code code = GET_CODE (operands[1]);
22502 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22504 rtx val = const0_rtx;
22505 bool fpcmp = false;
22507 rtx op0 = XEXP (operands[1], 0);
22508 rtx op1 = XEXP (operands[1], 1);
22510 if (operands[3] != const1_rtx
22511 && operands[3] != constm1_rtx)
22513 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22515 code = GET_CODE (compare_op);
22517 flags = XEXP (compare_op, 0);
22519 if (GET_MODE (flags) == CCFPmode
22520 || GET_MODE (flags) == CCFPUmode)
22523 code = ix86_fp_compare_code_to_integer (code);
22530 PUT_CODE (compare_op,
22531 reverse_condition_maybe_unordered
22532 (GET_CODE (compare_op)));
22534 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22537 mode = GET_MODE (operands[0]);
22539 /* Construct either adc or sbb insn. */
22540 if ((code == LTU) == (operands[3] == constm1_rtx))
22545 insn = gen_subqi3_carry;
22548 insn = gen_subhi3_carry;
22551 insn = gen_subsi3_carry;
22554 insn = gen_subdi3_carry;
22557 gcc_unreachable ();
22565 insn = gen_addqi3_carry;
22568 insn = gen_addhi3_carry;
22571 insn = gen_addsi3_carry;
22574 insn = gen_adddi3_carry;
22577 gcc_unreachable ();
22580 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22586 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22587 but works for floating pointer parameters and nonoffsetable memories.
22588 For pushes, it returns just stack offsets; the values will be saved
22589 in the right order. Maximally three parts are generated. */
22592 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22597 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22599 size = (GET_MODE_SIZE (mode) + 4) / 8;
22601 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22602 gcc_assert (size >= 2 && size <= 4);
22604 /* Optimize constant pool reference to immediates. This is used by fp
22605 moves, that force all constants to memory to allow combining. */
22606 if (MEM_P (operand) && MEM_READONLY_P (operand))
22608 rtx tmp = maybe_get_pool_constant (operand);
22613 if (MEM_P (operand) && !offsettable_memref_p (operand))
22615 /* The only non-offsetable memories we handle are pushes. */
22616 int ok = push_operand (operand, VOIDmode);
22620 operand = copy_rtx (operand);
22621 PUT_MODE (operand, word_mode);
22622 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22626 if (GET_CODE (operand) == CONST_VECTOR)
22628 machine_mode imode = int_mode_for_mode (mode);
22629 /* Caution: if we looked through a constant pool memory above,
22630 the operand may actually have a different mode now. That's
22631 ok, since we want to pun this all the way back to an integer. */
22632 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22633 gcc_assert (operand != NULL);
22639 if (mode == DImode)
22640 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22645 if (REG_P (operand))
22647 gcc_assert (reload_completed);
22648 for (i = 0; i < size; i++)
22649 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22651 else if (offsettable_memref_p (operand))
22653 operand = adjust_address (operand, SImode, 0);
22654 parts[0] = operand;
22655 for (i = 1; i < size; i++)
22656 parts[i] = adjust_address (operand, SImode, 4 * i);
22658 else if (GET_CODE (operand) == CONST_DOUBLE)
22663 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22667 real_to_target (l, &r, mode);
22668 parts[3] = gen_int_mode (l[3], SImode);
22669 parts[2] = gen_int_mode (l[2], SImode);
22672 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22673 long double may not be 80-bit. */
22674 real_to_target (l, &r, mode);
22675 parts[2] = gen_int_mode (l[2], SImode);
22678 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22681 gcc_unreachable ();
22683 parts[1] = gen_int_mode (l[1], SImode);
22684 parts[0] = gen_int_mode (l[0], SImode);
22687 gcc_unreachable ();
22692 if (mode == TImode)
22693 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22694 if (mode == XFmode || mode == TFmode)
22696 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22697 if (REG_P (operand))
22699 gcc_assert (reload_completed);
22700 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22701 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22703 else if (offsettable_memref_p (operand))
22705 operand = adjust_address (operand, DImode, 0);
22706 parts[0] = operand;
22707 parts[1] = adjust_address (operand, upper_mode, 8);
22709 else if (GET_CODE (operand) == CONST_DOUBLE)
22714 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22715 real_to_target (l, &r, mode);
22717 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22718 if (HOST_BITS_PER_WIDE_INT >= 64)
22721 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22722 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22725 parts[0] = immed_double_const (l[0], l[1], DImode);
22727 if (upper_mode == SImode)
22728 parts[1] = gen_int_mode (l[2], SImode);
22729 else if (HOST_BITS_PER_WIDE_INT >= 64)
22732 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22733 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22736 parts[1] = immed_double_const (l[2], l[3], DImode);
22739 gcc_unreachable ();
22746 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22747 Return false when normal moves are needed; true when all required
22748 insns have been emitted. Operands 2-4 contain the input values
22749 int the correct order; operands 5-7 contain the output values. */
22752 ix86_split_long_move (rtx operands[])
22757 int collisions = 0;
22758 machine_mode mode = GET_MODE (operands[0]);
22759 bool collisionparts[4];
22761 /* The DFmode expanders may ask us to move double.
22762 For 64bit target this is single move. By hiding the fact
22763 here we simplify i386.md splitters. */
22764 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22766 /* Optimize constant pool reference to immediates. This is used by
22767 fp moves, that force all constants to memory to allow combining. */
22769 if (MEM_P (operands[1])
22770 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22771 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22772 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22773 if (push_operand (operands[0], VOIDmode))
22775 operands[0] = copy_rtx (operands[0]);
22776 PUT_MODE (operands[0], word_mode);
22779 operands[0] = gen_lowpart (DImode, operands[0]);
22780 operands[1] = gen_lowpart (DImode, operands[1]);
22781 emit_move_insn (operands[0], operands[1]);
22785 /* The only non-offsettable memory we handle is push. */
22786 if (push_operand (operands[0], VOIDmode))
22789 gcc_assert (!MEM_P (operands[0])
22790 || offsettable_memref_p (operands[0]));
22792 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22793 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22795 /* When emitting push, take care for source operands on the stack. */
22796 if (push && MEM_P (operands[1])
22797 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22799 rtx src_base = XEXP (part[1][nparts - 1], 0);
22801 /* Compensate for the stack decrement by 4. */
22802 if (!TARGET_64BIT && nparts == 3
22803 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22804 src_base = plus_constant (Pmode, src_base, 4);
22806 /* src_base refers to the stack pointer and is
22807 automatically decreased by emitted push. */
22808 for (i = 0; i < nparts; i++)
22809 part[1][i] = change_address (part[1][i],
22810 GET_MODE (part[1][i]), src_base);
22813 /* We need to do copy in the right order in case an address register
22814 of the source overlaps the destination. */
22815 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22819 for (i = 0; i < nparts; i++)
22822 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22823 if (collisionparts[i])
22827 /* Collision in the middle part can be handled by reordering. */
22828 if (collisions == 1 && nparts == 3 && collisionparts [1])
22830 std::swap (part[0][1], part[0][2]);
22831 std::swap (part[1][1], part[1][2]);
22833 else if (collisions == 1
22835 && (collisionparts [1] || collisionparts [2]))
22837 if (collisionparts [1])
22839 std::swap (part[0][1], part[0][2]);
22840 std::swap (part[1][1], part[1][2]);
22844 std::swap (part[0][2], part[0][3]);
22845 std::swap (part[1][2], part[1][3]);
22849 /* If there are more collisions, we can't handle it by reordering.
22850 Do an lea to the last part and use only one colliding move. */
22851 else if (collisions > 1)
22857 base = part[0][nparts - 1];
22859 /* Handle the case when the last part isn't valid for lea.
22860 Happens in 64-bit mode storing the 12-byte XFmode. */
22861 if (GET_MODE (base) != Pmode)
22862 base = gen_rtx_REG (Pmode, REGNO (base));
22864 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22865 part[1][0] = replace_equiv_address (part[1][0], base);
22866 for (i = 1; i < nparts; i++)
22868 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22869 part[1][i] = replace_equiv_address (part[1][i], tmp);
22880 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22881 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22882 stack_pointer_rtx, GEN_INT (-4)));
22883 emit_move_insn (part[0][2], part[1][2]);
22885 else if (nparts == 4)
22887 emit_move_insn (part[0][3], part[1][3]);
22888 emit_move_insn (part[0][2], part[1][2]);
22893 /* In 64bit mode we don't have 32bit push available. In case this is
22894 register, it is OK - we will just use larger counterpart. We also
22895 retype memory - these comes from attempt to avoid REX prefix on
22896 moving of second half of TFmode value. */
22897 if (GET_MODE (part[1][1]) == SImode)
22899 switch (GET_CODE (part[1][1]))
22902 part[1][1] = adjust_address (part[1][1], DImode, 0);
22906 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22910 gcc_unreachable ();
22913 if (GET_MODE (part[1][0]) == SImode)
22914 part[1][0] = part[1][1];
22917 emit_move_insn (part[0][1], part[1][1]);
22918 emit_move_insn (part[0][0], part[1][0]);
22922 /* Choose correct order to not overwrite the source before it is copied. */
22923 if ((REG_P (part[0][0])
22924 && REG_P (part[1][1])
22925 && (REGNO (part[0][0]) == REGNO (part[1][1])
22927 && REGNO (part[0][0]) == REGNO (part[1][2]))
22929 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22931 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22933 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22935 operands[2 + i] = part[0][j];
22936 operands[6 + i] = part[1][j];
22941 for (i = 0; i < nparts; i++)
22943 operands[2 + i] = part[0][i];
22944 operands[6 + i] = part[1][i];
22948 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22949 if (optimize_insn_for_size_p ())
22951 for (j = 0; j < nparts - 1; j++)
22952 if (CONST_INT_P (operands[6 + j])
22953 && operands[6 + j] != const0_rtx
22954 && REG_P (operands[2 + j]))
22955 for (i = j; i < nparts - 1; i++)
22956 if (CONST_INT_P (operands[7 + i])
22957 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22958 operands[7 + i] = operands[2 + j];
22961 for (i = 0; i < nparts; i++)
22962 emit_move_insn (operands[2 + i], operands[6 + i]);
22967 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22968 left shift by a constant, either using a single shift or
22969 a sequence of add instructions. */
22972 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
22974 rtx (*insn)(rtx, rtx, rtx);
22977 || (count * ix86_cost->add <= ix86_cost->shift_const
22978 && !optimize_insn_for_size_p ()))
22980 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22981 while (count-- > 0)
22982 emit_insn (insn (operand, operand, operand));
22986 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22987 emit_insn (insn (operand, operand, GEN_INT (count)));
22992 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
22994 rtx (*gen_ashl3)(rtx, rtx, rtx);
22995 rtx (*gen_shld)(rtx, rtx, rtx);
22996 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22998 rtx low[2], high[2];
23001 if (CONST_INT_P (operands[2]))
23003 split_double_mode (mode, operands, 2, low, high);
23004 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23006 if (count >= half_width)
23008 emit_move_insn (high[0], low[1]);
23009 emit_move_insn (low[0], const0_rtx);
23011 if (count > half_width)
23012 ix86_expand_ashl_const (high[0], count - half_width, mode);
23016 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23018 if (!rtx_equal_p (operands[0], operands[1]))
23019 emit_move_insn (operands[0], operands[1]);
23021 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
23022 ix86_expand_ashl_const (low[0], count, mode);
23027 split_double_mode (mode, operands, 1, low, high);
23029 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23031 if (operands[1] == const1_rtx)
23033 /* Assuming we've chosen a QImode capable registers, then 1 << N
23034 can be done with two 32/64-bit shifts, no branches, no cmoves. */
23035 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
23037 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
23039 ix86_expand_clear (low[0]);
23040 ix86_expand_clear (high[0]);
23041 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23043 d = gen_lowpart (QImode, low[0]);
23044 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23045 s = gen_rtx_EQ (QImode, flags, const0_rtx);
23046 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23048 d = gen_lowpart (QImode, high[0]);
23049 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23050 s = gen_rtx_NE (QImode, flags, const0_rtx);
23051 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23054 /* Otherwise, we can get the same results by manually performing
23055 a bit extract operation on bit 5/6, and then performing the two
23056 shifts. The two methods of getting 0/1 into low/high are exactly
23057 the same size. Avoiding the shift in the bit extract case helps
23058 pentium4 a bit; no one else seems to care much either way. */
23061 machine_mode half_mode;
23062 rtx (*gen_lshr3)(rtx, rtx, rtx);
23063 rtx (*gen_and3)(rtx, rtx, rtx);
23064 rtx (*gen_xor3)(rtx, rtx, rtx);
23065 HOST_WIDE_INT bits;
23068 if (mode == DImode)
23070 half_mode = SImode;
23071 gen_lshr3 = gen_lshrsi3;
23072 gen_and3 = gen_andsi3;
23073 gen_xor3 = gen_xorsi3;
23078 half_mode = DImode;
23079 gen_lshr3 = gen_lshrdi3;
23080 gen_and3 = gen_anddi3;
23081 gen_xor3 = gen_xordi3;
23085 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23086 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23088 x = gen_lowpart (half_mode, operands[2]);
23089 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
23091 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23092 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23093 emit_move_insn (low[0], high[0]);
23094 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23097 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23098 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23102 if (operands[1] == constm1_rtx)
23104 /* For -1 << N, we can avoid the shld instruction, because we
23105 know that we're shifting 0...31/63 ones into a -1. */
23106 emit_move_insn (low[0], constm1_rtx);
23107 if (optimize_insn_for_size_p ())
23108 emit_move_insn (high[0], low[0]);
23110 emit_move_insn (high[0], constm1_rtx);
23114 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23116 if (!rtx_equal_p (operands[0], operands[1]))
23117 emit_move_insn (operands[0], operands[1]);
23119 split_double_mode (mode, operands, 1, low, high);
23120 emit_insn (gen_shld (high[0], low[0], operands[2]));
23123 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23125 if (TARGET_CMOVE && scratch)
23127 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23128 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23130 ix86_expand_clear (scratch);
23131 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23135 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23136 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23138 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23143 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23145 rtx (*gen_ashr3)(rtx, rtx, rtx)
23146 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23147 rtx (*gen_shrd)(rtx, rtx, rtx);
23148 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23150 rtx low[2], high[2];
23153 if (CONST_INT_P (operands[2]))
23155 split_double_mode (mode, operands, 2, low, high);
23156 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23158 if (count == GET_MODE_BITSIZE (mode) - 1)
23160 emit_move_insn (high[0], high[1]);
23161 emit_insn (gen_ashr3 (high[0], high[0],
23162 GEN_INT (half_width - 1)));
23163 emit_move_insn (low[0], high[0]);
23166 else if (count >= half_width)
23168 emit_move_insn (low[0], high[1]);
23169 emit_move_insn (high[0], low[0]);
23170 emit_insn (gen_ashr3 (high[0], high[0],
23171 GEN_INT (half_width - 1)));
23173 if (count > half_width)
23174 emit_insn (gen_ashr3 (low[0], low[0],
23175 GEN_INT (count - half_width)));
23179 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23181 if (!rtx_equal_p (operands[0], operands[1]))
23182 emit_move_insn (operands[0], operands[1]);
23184 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23185 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23190 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23192 if (!rtx_equal_p (operands[0], operands[1]))
23193 emit_move_insn (operands[0], operands[1]);
23195 split_double_mode (mode, operands, 1, low, high);
23197 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23198 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23200 if (TARGET_CMOVE && scratch)
23202 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23203 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23205 emit_move_insn (scratch, high[0]);
23206 emit_insn (gen_ashr3 (scratch, scratch,
23207 GEN_INT (half_width - 1)));
23208 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23213 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23214 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23216 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23222 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23224 rtx (*gen_lshr3)(rtx, rtx, rtx)
23225 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23226 rtx (*gen_shrd)(rtx, rtx, rtx);
23227 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23229 rtx low[2], high[2];
23232 if (CONST_INT_P (operands[2]))
23234 split_double_mode (mode, operands, 2, low, high);
23235 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23237 if (count >= half_width)
23239 emit_move_insn (low[0], high[1]);
23240 ix86_expand_clear (high[0]);
23242 if (count > half_width)
23243 emit_insn (gen_lshr3 (low[0], low[0],
23244 GEN_INT (count - half_width)));
23248 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23250 if (!rtx_equal_p (operands[0], operands[1]))
23251 emit_move_insn (operands[0], operands[1]);
23253 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23254 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23259 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23261 if (!rtx_equal_p (operands[0], operands[1]))
23262 emit_move_insn (operands[0], operands[1]);
23264 split_double_mode (mode, operands, 1, low, high);
23266 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23267 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23269 if (TARGET_CMOVE && scratch)
23271 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23272 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23274 ix86_expand_clear (scratch);
23275 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23280 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23281 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23283 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23288 /* Predict just emitted jump instruction to be taken with probability PROB. */
23290 predict_jump (int prob)
23292 rtx insn = get_last_insn ();
23293 gcc_assert (JUMP_P (insn));
23294 add_int_reg_note (insn, REG_BR_PROB, prob);
23297 /* Helper function for the string operations below. Dest VARIABLE whether
23298 it is aligned to VALUE bytes. If true, jump to the label. */
23299 static rtx_code_label *
23300 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23302 rtx_code_label *label = gen_label_rtx ();
23303 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23304 if (GET_MODE (variable) == DImode)
23305 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23307 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23308 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23311 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23313 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23317 /* Adjust COUNTER by the VALUE. */
23319 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23321 rtx (*gen_add)(rtx, rtx, rtx)
23322 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23324 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23327 /* Zero extend possibly SImode EXP to Pmode register. */
23329 ix86_zero_extend_to_Pmode (rtx exp)
23331 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23334 /* Divide COUNTREG by SCALE. */
23336 scale_counter (rtx countreg, int scale)
23342 if (CONST_INT_P (countreg))
23343 return GEN_INT (INTVAL (countreg) / scale);
23344 gcc_assert (REG_P (countreg));
23346 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23347 GEN_INT (exact_log2 (scale)),
23348 NULL, 1, OPTAB_DIRECT);
23352 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23353 DImode for constant loop counts. */
23355 static machine_mode
23356 counter_mode (rtx count_exp)
23358 if (GET_MODE (count_exp) != VOIDmode)
23359 return GET_MODE (count_exp);
23360 if (!CONST_INT_P (count_exp))
23362 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23367 /* Copy the address to a Pmode register. This is used for x32 to
23368 truncate DImode TLS address to a SImode register. */
23371 ix86_copy_addr_to_reg (rtx addr)
23373 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23374 return copy_addr_to_reg (addr);
23377 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23378 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
23382 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23383 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23384 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23385 memory by VALUE (supposed to be in MODE).
23387 The size is rounded down to whole number of chunk size moved at once.
23388 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23392 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23393 rtx destptr, rtx srcptr, rtx value,
23394 rtx count, machine_mode mode, int unroll,
23395 int expected_size, bool issetmem)
23397 rtx_code_label *out_label, *top_label;
23399 machine_mode iter_mode = counter_mode (count);
23400 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23401 rtx piece_size = GEN_INT (piece_size_n);
23402 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23406 top_label = gen_label_rtx ();
23407 out_label = gen_label_rtx ();
23408 iter = gen_reg_rtx (iter_mode);
23410 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23411 NULL, 1, OPTAB_DIRECT);
23412 /* Those two should combine. */
23413 if (piece_size == const1_rtx)
23415 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23417 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23419 emit_move_insn (iter, const0_rtx);
23421 emit_label (top_label);
23423 tmp = convert_modes (Pmode, iter_mode, iter, true);
23425 /* This assert could be relaxed - in this case we'll need to compute
23426 smallest power of two, containing in PIECE_SIZE_N and pass it to
23428 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23429 destmem = offset_address (destmem, tmp, piece_size_n);
23430 destmem = adjust_address (destmem, mode, 0);
23434 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23435 srcmem = adjust_address (srcmem, mode, 0);
23437 /* When unrolling for chips that reorder memory reads and writes,
23438 we can save registers by using single temporary.
23439 Also using 4 temporaries is overkill in 32bit mode. */
23440 if (!TARGET_64BIT && 0)
23442 for (i = 0; i < unroll; i++)
23447 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23449 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23451 emit_move_insn (destmem, srcmem);
23457 gcc_assert (unroll <= 4);
23458 for (i = 0; i < unroll; i++)
23460 tmpreg[i] = gen_reg_rtx (mode);
23464 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23466 emit_move_insn (tmpreg[i], srcmem);
23468 for (i = 0; i < unroll; i++)
23473 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23475 emit_move_insn (destmem, tmpreg[i]);
23480 for (i = 0; i < unroll; i++)
23484 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23485 emit_move_insn (destmem, value);
23488 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23489 true, OPTAB_LIB_WIDEN);
23491 emit_move_insn (iter, tmp);
23493 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23495 if (expected_size != -1)
23497 expected_size /= GET_MODE_SIZE (mode) * unroll;
23498 if (expected_size == 0)
23500 else if (expected_size > REG_BR_PROB_BASE)
23501 predict_jump (REG_BR_PROB_BASE - 1);
23503 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23506 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23507 iter = ix86_zero_extend_to_Pmode (iter);
23508 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23509 true, OPTAB_LIB_WIDEN);
23510 if (tmp != destptr)
23511 emit_move_insn (destptr, tmp);
23514 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23515 true, OPTAB_LIB_WIDEN);
23517 emit_move_insn (srcptr, tmp);
23519 emit_label (out_label);
23522 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23523 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23524 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23525 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23526 ORIG_VALUE is the original value passed to memset to fill the memory with.
23527 Other arguments have same meaning as for previous function. */
23530 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23531 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23533 machine_mode mode, bool issetmem)
23538 HOST_WIDE_INT rounded_count;
23540 /* If possible, it is shorter to use rep movs.
23541 TODO: Maybe it is better to move this logic to decide_alg. */
23542 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23543 && (!issetmem || orig_value == const0_rtx))
23546 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23547 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23549 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23550 GET_MODE_SIZE (mode)));
23551 if (mode != QImode)
23553 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23554 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23555 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23558 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23559 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23561 rounded_count = (INTVAL (count)
23562 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23563 destmem = shallow_copy_rtx (destmem);
23564 set_mem_size (destmem, rounded_count);
23566 else if (MEM_SIZE_KNOWN_P (destmem))
23567 clear_mem_size (destmem);
23571 value = force_reg (mode, gen_lowpart (mode, value));
23572 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23576 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23577 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23578 if (mode != QImode)
23580 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23581 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23582 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23585 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23586 if (CONST_INT_P (count))
23588 rounded_count = (INTVAL (count)
23589 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23590 srcmem = shallow_copy_rtx (srcmem);
23591 set_mem_size (srcmem, rounded_count);
23595 if (MEM_SIZE_KNOWN_P (srcmem))
23596 clear_mem_size (srcmem);
23598 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23603 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23605 SRC is passed by pointer to be updated on return.
23606 Return value is updated DST. */
23608 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23609 HOST_WIDE_INT size_to_move)
23611 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23612 enum insn_code code;
23613 machine_mode move_mode;
23616 /* Find the widest mode in which we could perform moves.
23617 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23618 it until move of such size is supported. */
23619 piece_size = 1 << floor_log2 (size_to_move);
23620 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23621 code = optab_handler (mov_optab, move_mode);
23622 while (code == CODE_FOR_nothing && piece_size > 1)
23625 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23626 code = optab_handler (mov_optab, move_mode);
23629 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23630 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23631 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23633 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23634 move_mode = mode_for_vector (word_mode, nunits);
23635 code = optab_handler (mov_optab, move_mode);
23636 if (code == CODE_FOR_nothing)
23638 move_mode = word_mode;
23639 piece_size = GET_MODE_SIZE (move_mode);
23640 code = optab_handler (mov_optab, move_mode);
23643 gcc_assert (code != CODE_FOR_nothing);
23645 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23646 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23648 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23649 gcc_assert (size_to_move % piece_size == 0);
23650 adjust = GEN_INT (piece_size);
23651 for (i = 0; i < size_to_move; i += piece_size)
23653 /* We move from memory to memory, so we'll need to do it via
23654 a temporary register. */
23655 tempreg = gen_reg_rtx (move_mode);
23656 emit_insn (GEN_FCN (code) (tempreg, src));
23657 emit_insn (GEN_FCN (code) (dst, tempreg));
23659 emit_move_insn (destptr,
23660 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23661 emit_move_insn (srcptr,
23662 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23664 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23666 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23670 /* Update DST and SRC rtx. */
23675 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23677 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23678 rtx destptr, rtx srcptr, rtx count, int max_size)
23681 if (CONST_INT_P (count))
23683 HOST_WIDE_INT countval = INTVAL (count);
23684 HOST_WIDE_INT epilogue_size = countval % max_size;
23687 /* For now MAX_SIZE should be a power of 2. This assert could be
23688 relaxed, but it'll require a bit more complicated epilogue
23690 gcc_assert ((max_size & (max_size - 1)) == 0);
23691 for (i = max_size; i >= 1; i >>= 1)
23693 if (epilogue_size & i)
23694 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23700 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23701 count, 1, OPTAB_DIRECT);
23702 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23703 count, QImode, 1, 4, false);
23707 /* When there are stringops, we can cheaply increase dest and src pointers.
23708 Otherwise we save code size by maintaining offset (zero is readily
23709 available from preceding rep operation) and using x86 addressing modes.
23711 if (TARGET_SINGLE_STRINGOP)
23715 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23716 src = change_address (srcmem, SImode, srcptr);
23717 dest = change_address (destmem, SImode, destptr);
23718 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23719 emit_label (label);
23720 LABEL_NUSES (label) = 1;
23724 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23725 src = change_address (srcmem, HImode, srcptr);
23726 dest = change_address (destmem, HImode, destptr);
23727 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23728 emit_label (label);
23729 LABEL_NUSES (label) = 1;
23733 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23734 src = change_address (srcmem, QImode, srcptr);
23735 dest = change_address (destmem, QImode, destptr);
23736 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23737 emit_label (label);
23738 LABEL_NUSES (label) = 1;
23743 rtx offset = force_reg (Pmode, const0_rtx);
23748 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23749 src = change_address (srcmem, SImode, srcptr);
23750 dest = change_address (destmem, SImode, destptr);
23751 emit_move_insn (dest, src);
23752 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23753 true, OPTAB_LIB_WIDEN);
23755 emit_move_insn (offset, tmp);
23756 emit_label (label);
23757 LABEL_NUSES (label) = 1;
23761 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23762 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23763 src = change_address (srcmem, HImode, tmp);
23764 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23765 dest = change_address (destmem, HImode, tmp);
23766 emit_move_insn (dest, src);
23767 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23768 true, OPTAB_LIB_WIDEN);
23770 emit_move_insn (offset, tmp);
23771 emit_label (label);
23772 LABEL_NUSES (label) = 1;
23776 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23777 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23778 src = change_address (srcmem, QImode, tmp);
23779 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23780 dest = change_address (destmem, QImode, tmp);
23781 emit_move_insn (dest, src);
23782 emit_label (label);
23783 LABEL_NUSES (label) = 1;
23788 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23789 with value PROMOTED_VAL.
23790 SRC is passed by pointer to be updated on return.
23791 Return value is updated DST. */
23793 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23794 HOST_WIDE_INT size_to_move)
23796 rtx dst = destmem, adjust;
23797 enum insn_code code;
23798 machine_mode move_mode;
23801 /* Find the widest mode in which we could perform moves.
23802 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23803 it until move of such size is supported. */
23804 move_mode = GET_MODE (promoted_val);
23805 if (move_mode == VOIDmode)
23806 move_mode = QImode;
23807 if (size_to_move < GET_MODE_SIZE (move_mode))
23809 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23810 promoted_val = gen_lowpart (move_mode, promoted_val);
23812 piece_size = GET_MODE_SIZE (move_mode);
23813 code = optab_handler (mov_optab, move_mode);
23814 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23816 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23818 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23819 gcc_assert (size_to_move % piece_size == 0);
23820 adjust = GEN_INT (piece_size);
23821 for (i = 0; i < size_to_move; i += piece_size)
23823 if (piece_size <= GET_MODE_SIZE (word_mode))
23825 emit_insn (gen_strset (destptr, dst, promoted_val));
23826 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23831 emit_insn (GEN_FCN (code) (dst, promoted_val));
23833 emit_move_insn (destptr,
23834 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23836 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23840 /* Update DST rtx. */
23843 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23845 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23846 rtx count, int max_size)
23849 expand_simple_binop (counter_mode (count), AND, count,
23850 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23851 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23852 gen_lowpart (QImode, value), count, QImode,
23853 1, max_size / 2, true);
23856 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23858 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23859 rtx count, int max_size)
23863 if (CONST_INT_P (count))
23865 HOST_WIDE_INT countval = INTVAL (count);
23866 HOST_WIDE_INT epilogue_size = countval % max_size;
23869 /* For now MAX_SIZE should be a power of 2. This assert could be
23870 relaxed, but it'll require a bit more complicated epilogue
23872 gcc_assert ((max_size & (max_size - 1)) == 0);
23873 for (i = max_size; i >= 1; i >>= 1)
23875 if (epilogue_size & i)
23877 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23878 destmem = emit_memset (destmem, destptr, vec_value, i);
23880 destmem = emit_memset (destmem, destptr, value, i);
23887 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23892 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23895 dest = change_address (destmem, DImode, destptr);
23896 emit_insn (gen_strset (destptr, dest, value));
23897 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23898 emit_insn (gen_strset (destptr, dest, value));
23902 dest = change_address (destmem, SImode, destptr);
23903 emit_insn (gen_strset (destptr, dest, value));
23904 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23905 emit_insn (gen_strset (destptr, dest, value));
23906 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23907 emit_insn (gen_strset (destptr, dest, value));
23908 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23909 emit_insn (gen_strset (destptr, dest, value));
23911 emit_label (label);
23912 LABEL_NUSES (label) = 1;
23916 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
23919 dest = change_address (destmem, DImode, destptr);
23920 emit_insn (gen_strset (destptr, dest, value));
23924 dest = change_address (destmem, SImode, destptr);
23925 emit_insn (gen_strset (destptr, dest, value));
23926 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23927 emit_insn (gen_strset (destptr, dest, value));
23929 emit_label (label);
23930 LABEL_NUSES (label) = 1;
23934 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23935 dest = change_address (destmem, SImode, destptr);
23936 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23937 emit_label (label);
23938 LABEL_NUSES (label) = 1;
23942 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23943 dest = change_address (destmem, HImode, destptr);
23944 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23945 emit_label (label);
23946 LABEL_NUSES (label) = 1;
23950 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23951 dest = change_address (destmem, QImode, destptr);
23952 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23953 emit_label (label);
23954 LABEL_NUSES (label) = 1;
23958 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23959 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23960 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23962 Return value is updated DESTMEM. */
23964 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23965 rtx destptr, rtx srcptr, rtx value,
23966 rtx vec_value, rtx count, int align,
23967 int desired_alignment, bool issetmem)
23970 for (i = 1; i < desired_alignment; i <<= 1)
23974 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
23977 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23978 destmem = emit_memset (destmem, destptr, vec_value, i);
23980 destmem = emit_memset (destmem, destptr, value, i);
23983 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23984 ix86_adjust_counter (count, i);
23985 emit_label (label);
23986 LABEL_NUSES (label) = 1;
23987 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23993 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23994 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23995 and jump to DONE_LABEL. */
23997 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23998 rtx destptr, rtx srcptr,
23999 rtx value, rtx vec_value,
24000 rtx count, int size,
24001 rtx done_label, bool issetmem)
24003 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
24004 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
24008 /* If we do not have vector value to copy, we must reduce size. */
24013 if (GET_MODE (value) == VOIDmode && size > 8)
24015 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
24016 mode = GET_MODE (value);
24019 mode = GET_MODE (vec_value), value = vec_value;
24023 /* Choose appropriate vector mode. */
24025 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
24026 else if (size >= 16)
24027 mode = TARGET_SSE ? V16QImode : DImode;
24028 srcmem = change_address (srcmem, mode, srcptr);
24030 destmem = change_address (destmem, mode, destptr);
24031 modesize = GEN_INT (GET_MODE_SIZE (mode));
24032 gcc_assert (GET_MODE_SIZE (mode) <= size);
24033 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24036 emit_move_insn (destmem, gen_lowpart (mode, value));
24039 emit_move_insn (destmem, srcmem);
24040 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24042 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24045 destmem = offset_address (destmem, count, 1);
24046 destmem = offset_address (destmem, GEN_INT (-2 * size),
24047 GET_MODE_SIZE (mode));
24050 srcmem = offset_address (srcmem, count, 1);
24051 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24052 GET_MODE_SIZE (mode));
24054 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24057 emit_move_insn (destmem, gen_lowpart (mode, value));
24060 emit_move_insn (destmem, srcmem);
24061 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24063 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24065 emit_jump_insn (gen_jump (done_label));
24068 emit_label (label);
24069 LABEL_NUSES (label) = 1;
24072 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24073 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24074 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24075 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24076 DONE_LABEL is a label after the whole copying sequence. The label is created
24077 on demand if *DONE_LABEL is NULL.
24078 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
24079 bounds after the initial copies.
24081 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24082 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24083 we will dispatch to a library call for large blocks.
24085 In pseudocode we do:
24089 Assume that SIZE is 4. Bigger sizes are handled analogously
24092 copy 4 bytes from SRCPTR to DESTPTR
24093 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24098 copy 1 byte from SRCPTR to DESTPTR
24101 copy 2 bytes from SRCPTR to DESTPTR
24102 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24107 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24108 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24110 OLD_DESPTR = DESTPTR;
24111 Align DESTPTR up to DESIRED_ALIGN
24112 SRCPTR += DESTPTR - OLD_DESTPTR
24113 COUNT -= DEST_PTR - OLD_DESTPTR
24115 Round COUNT down to multiple of SIZE
24116 << optional caller supplied zero size guard is here >>
24117 << optional caller suppplied dynamic check is here >>
24118 << caller supplied main copy loop is here >>
24123 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24124 rtx *destptr, rtx *srcptr,
24126 rtx value, rtx vec_value,
24128 rtx_code_label **done_label,
24132 unsigned HOST_WIDE_INT *min_size,
24133 bool dynamic_check,
24136 rtx_code_label *loop_label = NULL, *label;
24139 int prolog_size = 0;
24142 /* Chose proper value to copy. */
24143 if (issetmem && VECTOR_MODE_P (mode))
24144 mode_value = vec_value;
24146 mode_value = value;
24147 gcc_assert (GET_MODE_SIZE (mode) <= size);
24149 /* See if block is big or small, handle small blocks. */
24150 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24153 loop_label = gen_label_rtx ();
24156 *done_label = gen_label_rtx ();
24158 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24162 /* Handle sizes > 3. */
24163 for (;size2 > 2; size2 >>= 1)
24164 expand_small_movmem_or_setmem (destmem, srcmem,
24168 size2, *done_label, issetmem);
24169 /* Nothing to copy? Jump to DONE_LABEL if so */
24170 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24173 /* Do a byte copy. */
24174 destmem = change_address (destmem, QImode, *destptr);
24176 emit_move_insn (destmem, gen_lowpart (QImode, value));
24179 srcmem = change_address (srcmem, QImode, *srcptr);
24180 emit_move_insn (destmem, srcmem);
24183 /* Handle sizes 2 and 3. */
24184 label = ix86_expand_aligntest (*count, 2, false);
24185 destmem = change_address (destmem, HImode, *destptr);
24186 destmem = offset_address (destmem, *count, 1);
24187 destmem = offset_address (destmem, GEN_INT (-2), 2);
24189 emit_move_insn (destmem, gen_lowpart (HImode, value));
24192 srcmem = change_address (srcmem, HImode, *srcptr);
24193 srcmem = offset_address (srcmem, *count, 1);
24194 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24195 emit_move_insn (destmem, srcmem);
24198 emit_label (label);
24199 LABEL_NUSES (label) = 1;
24200 emit_jump_insn (gen_jump (*done_label));
24204 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24205 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24207 /* Start memcpy for COUNT >= SIZE. */
24210 emit_label (loop_label);
24211 LABEL_NUSES (loop_label) = 1;
24214 /* Copy first desired_align bytes. */
24216 srcmem = change_address (srcmem, mode, *srcptr);
24217 destmem = change_address (destmem, mode, *destptr);
24218 modesize = GEN_INT (GET_MODE_SIZE (mode));
24219 for (n = 0; prolog_size < desired_align - align; n++)
24222 emit_move_insn (destmem, mode_value);
24225 emit_move_insn (destmem, srcmem);
24226 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24228 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24229 prolog_size += GET_MODE_SIZE (mode);
24233 /* Copy last SIZE bytes. */
24234 destmem = offset_address (destmem, *count, 1);
24235 destmem = offset_address (destmem,
24236 GEN_INT (-size - prolog_size),
24239 emit_move_insn (destmem, mode_value);
24242 srcmem = offset_address (srcmem, *count, 1);
24243 srcmem = offset_address (srcmem,
24244 GEN_INT (-size - prolog_size),
24246 emit_move_insn (destmem, srcmem);
24248 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24250 destmem = offset_address (destmem, modesize, 1);
24252 emit_move_insn (destmem, mode_value);
24255 srcmem = offset_address (srcmem, modesize, 1);
24256 emit_move_insn (destmem, srcmem);
24260 /* Align destination. */
24261 if (desired_align > 1 && desired_align > align)
24263 rtx saveddest = *destptr;
24265 gcc_assert (desired_align <= size);
24266 /* Align destptr up, place it to new register. */
24267 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24268 GEN_INT (prolog_size),
24269 NULL_RTX, 1, OPTAB_DIRECT);
24270 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24271 GEN_INT (-desired_align),
24272 *destptr, 1, OPTAB_DIRECT);
24273 /* See how many bytes we skipped. */
24274 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24276 saveddest, 1, OPTAB_DIRECT);
24277 /* Adjust srcptr and count. */
24279 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
24280 *srcptr, 1, OPTAB_DIRECT);
24281 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24282 saveddest, *count, 1, OPTAB_DIRECT);
24283 /* We copied at most size + prolog_size. */
24284 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24285 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24289 /* Our loops always round down the bock size, but for dispatch to library
24290 we need precise value. */
24292 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24293 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24297 gcc_assert (prolog_size == 0);
24298 /* Decrease count, so we won't end up copying last word twice. */
24299 if (!CONST_INT_P (*count))
24300 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24301 constm1_rtx, *count, 1, OPTAB_DIRECT);
24303 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24305 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24310 /* This function is like the previous one, except here we know how many bytes
24311 need to be copied. That allows us to update alignment not only of DST, which
24312 is returned, but also of SRC, which is passed as a pointer for that
24315 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24316 rtx srcreg, rtx value, rtx vec_value,
24317 int desired_align, int align_bytes,
24321 rtx orig_dst = dst;
24322 rtx orig_src = NULL;
24323 int piece_size = 1;
24324 int copied_bytes = 0;
24328 gcc_assert (srcp != NULL);
24333 for (piece_size = 1;
24334 piece_size <= desired_align && copied_bytes < align_bytes;
24337 if (align_bytes & piece_size)
24341 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24342 dst = emit_memset (dst, destreg, vec_value, piece_size);
24344 dst = emit_memset (dst, destreg, value, piece_size);
24347 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24348 copied_bytes += piece_size;
24351 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24352 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24353 if (MEM_SIZE_KNOWN_P (orig_dst))
24354 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24358 int src_align_bytes = get_mem_align_offset (src, desired_align
24360 if (src_align_bytes >= 0)
24361 src_align_bytes = desired_align - src_align_bytes;
24362 if (src_align_bytes >= 0)
24364 unsigned int src_align;
24365 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24367 if ((src_align_bytes & (src_align - 1))
24368 == (align_bytes & (src_align - 1)))
24371 if (src_align > (unsigned int) desired_align)
24372 src_align = desired_align;
24373 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24374 set_mem_align (src, src_align * BITS_PER_UNIT);
24376 if (MEM_SIZE_KNOWN_P (orig_src))
24377 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24384 /* Return true if ALG can be used in current context.
24385 Assume we expand memset if MEMSET is true. */
24387 alg_usable_p (enum stringop_alg alg, bool memset)
24389 if (alg == no_stringop)
24391 if (alg == vector_loop)
24392 return TARGET_SSE || TARGET_AVX;
24393 /* Algorithms using the rep prefix want at least edi and ecx;
24394 additionally, memset wants eax and memcpy wants esi. Don't
24395 consider such algorithms if the user has appropriated those
24396 registers for their own purposes. */
24397 if (alg == rep_prefix_1_byte
24398 || alg == rep_prefix_4_byte
24399 || alg == rep_prefix_8_byte)
24400 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24401 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24405 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24406 static enum stringop_alg
24407 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24408 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24409 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24411 const struct stringop_algs * algs;
24412 bool optimize_for_speed;
24414 const struct processor_costs *cost;
24416 bool any_alg_usable_p = false;
24419 *dynamic_check = -1;
24421 /* Even if the string operation call is cold, we still might spend a lot
24422 of time processing large blocks. */
24423 if (optimize_function_for_size_p (cfun)
24424 || (optimize_insn_for_size_p ()
24426 || (expected_size != -1 && expected_size < 256))))
24427 optimize_for_speed = false;
24429 optimize_for_speed = true;
24431 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24433 algs = &cost->memset[TARGET_64BIT != 0];
24435 algs = &cost->memcpy[TARGET_64BIT != 0];
24437 /* See maximal size for user defined algorithm. */
24438 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24440 enum stringop_alg candidate = algs->size[i].alg;
24441 bool usable = alg_usable_p (candidate, memset);
24442 any_alg_usable_p |= usable;
24444 if (candidate != libcall && candidate && usable)
24445 max = algs->size[i].max;
24448 /* If expected size is not known but max size is small enough
24449 so inline version is a win, set expected size into
24451 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24452 && expected_size == -1)
24453 expected_size = min_size / 2 + max_size / 2;
24455 /* If user specified the algorithm, honnor it if possible. */
24456 if (ix86_stringop_alg != no_stringop
24457 && alg_usable_p (ix86_stringop_alg, memset))
24458 return ix86_stringop_alg;
24459 /* rep; movq or rep; movl is the smallest variant. */
24460 else if (!optimize_for_speed)
24463 if (!count || (count & 3) || (memset && !zero_memset))
24464 return alg_usable_p (rep_prefix_1_byte, memset)
24465 ? rep_prefix_1_byte : loop_1_byte;
24467 return alg_usable_p (rep_prefix_4_byte, memset)
24468 ? rep_prefix_4_byte : loop;
24470 /* Very tiny blocks are best handled via the loop, REP is expensive to
24472 else if (expected_size != -1 && expected_size < 4)
24473 return loop_1_byte;
24474 else if (expected_size != -1)
24476 enum stringop_alg alg = libcall;
24477 bool alg_noalign = false;
24478 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24480 /* We get here if the algorithms that were not libcall-based
24481 were rep-prefix based and we are unable to use rep prefixes
24482 based on global register usage. Break out of the loop and
24483 use the heuristic below. */
24484 if (algs->size[i].max == 0)
24486 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24488 enum stringop_alg candidate = algs->size[i].alg;
24490 if (candidate != libcall && alg_usable_p (candidate, memset))
24493 alg_noalign = algs->size[i].noalign;
24495 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24496 last non-libcall inline algorithm. */
24497 if (TARGET_INLINE_ALL_STRINGOPS)
24499 /* When the current size is best to be copied by a libcall,
24500 but we are still forced to inline, run the heuristic below
24501 that will pick code for medium sized blocks. */
24502 if (alg != libcall)
24504 *noalign = alg_noalign;
24507 else if (!any_alg_usable_p)
24510 else if (alg_usable_p (candidate, memset))
24512 *noalign = algs->size[i].noalign;
24518 /* When asked to inline the call anyway, try to pick meaningful choice.
24519 We look for maximal size of block that is faster to copy by hand and
24520 take blocks of at most of that size guessing that average size will
24521 be roughly half of the block.
24523 If this turns out to be bad, we might simply specify the preferred
24524 choice in ix86_costs. */
24525 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24526 && (algs->unknown_size == libcall
24527 || !alg_usable_p (algs->unknown_size, memset)))
24529 enum stringop_alg alg;
24531 /* If there aren't any usable algorithms, then recursing on
24532 smaller sizes isn't going to find anything. Just return the
24533 simple byte-at-a-time copy loop. */
24534 if (!any_alg_usable_p)
24536 /* Pick something reasonable. */
24537 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24538 *dynamic_check = 128;
24539 return loop_1_byte;
24543 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24544 zero_memset, dynamic_check, noalign);
24545 gcc_assert (*dynamic_check == -1);
24546 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24547 *dynamic_check = max;
24549 gcc_assert (alg != libcall);
24552 return (alg_usable_p (algs->unknown_size, memset)
24553 ? algs->unknown_size : libcall);
24556 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24557 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24559 decide_alignment (int align,
24560 enum stringop_alg alg,
24562 machine_mode move_mode)
24564 int desired_align = 0;
24566 gcc_assert (alg != no_stringop);
24568 if (alg == libcall)
24570 if (move_mode == VOIDmode)
24573 desired_align = GET_MODE_SIZE (move_mode);
24574 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24575 copying whole cacheline at once. */
24576 if (TARGET_PENTIUMPRO
24577 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24582 if (desired_align < align)
24583 desired_align = align;
24584 if (expected_size != -1 && expected_size < 4)
24585 desired_align = align;
24587 return desired_align;
24591 /* Helper function for memcpy. For QImode value 0xXY produce
24592 0xXYXYXYXY of wide specified by MODE. This is essentially
24593 a * 0x10101010, but we can do slightly better than
24594 synth_mult by unwinding the sequence by hand on CPUs with
24597 promote_duplicated_reg (machine_mode mode, rtx val)
24599 machine_mode valmode = GET_MODE (val);
24601 int nops = mode == DImode ? 3 : 2;
24603 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24604 if (val == const0_rtx)
24605 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24606 if (CONST_INT_P (val))
24608 HOST_WIDE_INT v = INTVAL (val) & 255;
24612 if (mode == DImode)
24613 v |= (v << 16) << 16;
24614 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24617 if (valmode == VOIDmode)
24619 if (valmode != QImode)
24620 val = gen_lowpart (QImode, val);
24621 if (mode == QImode)
24623 if (!TARGET_PARTIAL_REG_STALL)
24625 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24626 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24627 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24628 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24630 rtx reg = convert_modes (mode, QImode, val, true);
24631 tmp = promote_duplicated_reg (mode, const1_rtx);
24632 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24637 rtx reg = convert_modes (mode, QImode, val, true);
24639 if (!TARGET_PARTIAL_REG_STALL)
24640 if (mode == SImode)
24641 emit_insn (gen_movsi_insv_1 (reg, reg));
24643 emit_insn (gen_movdi_insv_1 (reg, reg));
24646 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24647 NULL, 1, OPTAB_DIRECT);
24649 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24651 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24652 NULL, 1, OPTAB_DIRECT);
24653 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24654 if (mode == SImode)
24656 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24657 NULL, 1, OPTAB_DIRECT);
24658 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24663 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24664 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24665 alignment from ALIGN to DESIRED_ALIGN. */
24667 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24673 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24674 promoted_val = promote_duplicated_reg (DImode, val);
24675 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24676 promoted_val = promote_duplicated_reg (SImode, val);
24677 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24678 promoted_val = promote_duplicated_reg (HImode, val);
24680 promoted_val = val;
24682 return promoted_val;
24685 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24686 operations when profitable. The code depends upon architecture, block size
24687 and alignment, but always has one of the following overall structures:
24689 Aligned move sequence:
24691 1) Prologue guard: Conditional that jumps up to epilogues for small
24692 blocks that can be handled by epilogue alone. This is faster
24693 but also needed for correctness, since prologue assume the block
24694 is larger than the desired alignment.
24696 Optional dynamic check for size and libcall for large
24697 blocks is emitted here too, with -minline-stringops-dynamically.
24699 2) Prologue: copy first few bytes in order to get destination
24700 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24701 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24702 copied. We emit either a jump tree on power of two sized
24703 blocks, or a byte loop.
24705 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24706 with specified algorithm.
24708 4) Epilogue: code copying tail of the block that is too small to be
24709 handled by main body (or up to size guarded by prologue guard).
24711 Misaligned move sequence
24713 1) missaligned move prologue/epilogue containing:
24714 a) Prologue handling small memory blocks and jumping to done_label
24715 (skipped if blocks are known to be large enough)
24716 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24717 needed by single possibly misaligned move
24718 (skipped if alignment is not needed)
24719 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24721 2) Zero size guard dispatching to done_label, if needed
24723 3) dispatch to library call, if needed,
24725 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24726 with specified algorithm. */
24728 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24729 rtx align_exp, rtx expected_align_exp,
24730 rtx expected_size_exp, rtx min_size_exp,
24731 rtx max_size_exp, rtx probable_max_size_exp,
24736 rtx_code_label *label = NULL;
24738 rtx_code_label *jump_around_label = NULL;
24739 HOST_WIDE_INT align = 1;
24740 unsigned HOST_WIDE_INT count = 0;
24741 HOST_WIDE_INT expected_size = -1;
24742 int size_needed = 0, epilogue_size_needed;
24743 int desired_align = 0, align_bytes = 0;
24744 enum stringop_alg alg;
24745 rtx promoted_val = NULL;
24746 rtx vec_promoted_val = NULL;
24747 bool force_loopy_epilogue = false;
24749 bool need_zero_guard = false;
24751 machine_mode move_mode = VOIDmode;
24752 int unroll_factor = 1;
24753 /* TODO: Once value ranges are available, fill in proper data. */
24754 unsigned HOST_WIDE_INT min_size = 0;
24755 unsigned HOST_WIDE_INT max_size = -1;
24756 unsigned HOST_WIDE_INT probable_max_size = -1;
24757 bool misaligned_prologue_used = false;
24759 if (CONST_INT_P (align_exp))
24760 align = INTVAL (align_exp);
24761 /* i386 can do misaligned access on reasonably increased cost. */
24762 if (CONST_INT_P (expected_align_exp)
24763 && INTVAL (expected_align_exp) > align)
24764 align = INTVAL (expected_align_exp);
24765 /* ALIGN is the minimum of destination and source alignment, but we care here
24766 just about destination alignment. */
24768 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24769 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24771 if (CONST_INT_P (count_exp))
24773 min_size = max_size = probable_max_size = count = expected_size
24774 = INTVAL (count_exp);
24775 /* When COUNT is 0, there is nothing to do. */
24782 min_size = INTVAL (min_size_exp);
24784 max_size = INTVAL (max_size_exp);
24785 if (probable_max_size_exp)
24786 probable_max_size = INTVAL (probable_max_size_exp);
24787 if (CONST_INT_P (expected_size_exp))
24788 expected_size = INTVAL (expected_size_exp);
24791 /* Make sure we don't need to care about overflow later on. */
24792 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24795 /* Step 0: Decide on preferred algorithm, desired alignment and
24796 size of chunks to be copied by main loop. */
24797 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24799 issetmem && val_exp == const0_rtx,
24800 &dynamic_check, &noalign);
24801 if (alg == libcall)
24803 gcc_assert (alg != no_stringop);
24805 /* For now vector-version of memset is generated only for memory zeroing, as
24806 creating of promoted vector value is very cheap in this case. */
24807 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24808 alg = unrolled_loop;
24811 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24812 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24814 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24817 move_mode = word_mode;
24823 gcc_unreachable ();
24825 need_zero_guard = true;
24826 move_mode = QImode;
24829 need_zero_guard = true;
24831 case unrolled_loop:
24832 need_zero_guard = true;
24833 unroll_factor = (TARGET_64BIT ? 4 : 2);
24836 need_zero_guard = true;
24838 /* Find the widest supported mode. */
24839 move_mode = word_mode;
24840 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24841 != CODE_FOR_nothing)
24842 move_mode = GET_MODE_WIDER_MODE (move_mode);
24844 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24845 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24846 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24848 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24849 move_mode = mode_for_vector (word_mode, nunits);
24850 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24851 move_mode = word_mode;
24853 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24855 case rep_prefix_8_byte:
24856 move_mode = DImode;
24858 case rep_prefix_4_byte:
24859 move_mode = SImode;
24861 case rep_prefix_1_byte:
24862 move_mode = QImode;
24865 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24866 epilogue_size_needed = size_needed;
24868 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24869 if (!TARGET_ALIGN_STRINGOPS || noalign)
24870 align = desired_align;
24872 /* Step 1: Prologue guard. */
24874 /* Alignment code needs count to be in register. */
24875 if (CONST_INT_P (count_exp) && desired_align > align)
24877 if (INTVAL (count_exp) > desired_align
24878 && INTVAL (count_exp) > size_needed)
24881 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24882 if (align_bytes <= 0)
24885 align_bytes = desired_align - align_bytes;
24887 if (align_bytes == 0)
24888 count_exp = force_reg (counter_mode (count_exp), count_exp);
24890 gcc_assert (desired_align >= 1 && align >= 1);
24892 /* Misaligned move sequences handle both prologue and epilogue at once.
24893 Default code generation results in a smaller code for large alignments
24894 and also avoids redundant job when sizes are known precisely. */
24895 misaligned_prologue_used
24896 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24897 && MAX (desired_align, epilogue_size_needed) <= 32
24898 && desired_align <= epilogue_size_needed
24899 && ((desired_align > align && !align_bytes)
24900 || (!count && epilogue_size_needed > 1)));
24902 /* Do the cheap promotion to allow better CSE across the
24903 main loop and epilogue (ie one load of the big constant in the
24905 For now the misaligned move sequences do not have fast path
24906 without broadcasting. */
24907 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24909 if (alg == vector_loop)
24911 gcc_assert (val_exp == const0_rtx);
24912 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24913 promoted_val = promote_duplicated_reg_to_size (val_exp,
24914 GET_MODE_SIZE (word_mode),
24915 desired_align, align);
24919 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24920 desired_align, align);
24923 /* Misaligned move sequences handles both prologues and epilogues at once.
24924 Default code generation results in smaller code for large alignments and
24925 also avoids redundant job when sizes are known precisely. */
24926 if (misaligned_prologue_used)
24928 /* Misaligned move prologue handled small blocks by itself. */
24929 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24930 (dst, src, &destreg, &srcreg,
24931 move_mode, promoted_val, vec_promoted_val,
24933 &jump_around_label,
24934 desired_align < align
24935 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24936 desired_align, align, &min_size, dynamic_check, issetmem);
24938 src = change_address (src, BLKmode, srcreg);
24939 dst = change_address (dst, BLKmode, destreg);
24940 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24941 epilogue_size_needed = 0;
24942 if (need_zero_guard && !min_size)
24944 /* It is possible that we copied enough so the main loop will not
24946 gcc_assert (size_needed > 1);
24947 if (jump_around_label == NULL_RTX)
24948 jump_around_label = gen_label_rtx ();
24949 emit_cmp_and_jump_insns (count_exp,
24950 GEN_INT (size_needed),
24951 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24952 if (expected_size == -1
24953 || expected_size < (desired_align - align) / 2 + size_needed)
24954 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24956 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24959 /* Ensure that alignment prologue won't copy past end of block. */
24960 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24962 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24963 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24964 Make sure it is power of 2. */
24965 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24967 /* To improve performance of small blocks, we jump around the VAL
24968 promoting mode. This mean that if the promoted VAL is not constant,
24969 we might not use it in the epilogue and have to use byte
24971 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24972 force_loopy_epilogue = true;
24973 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24974 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24976 /* If main algorithm works on QImode, no epilogue is needed.
24977 For small sizes just don't align anything. */
24978 if (size_needed == 1)
24979 desired_align = align;
24984 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24986 label = gen_label_rtx ();
24987 emit_cmp_and_jump_insns (count_exp,
24988 GEN_INT (epilogue_size_needed),
24989 LTU, 0, counter_mode (count_exp), 1, label);
24990 if (expected_size == -1 || expected_size < epilogue_size_needed)
24991 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24993 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24997 /* Emit code to decide on runtime whether library call or inline should be
24999 if (dynamic_check != -1)
25001 if (!issetmem && CONST_INT_P (count_exp))
25003 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
25005 emit_block_move_via_libcall (dst, src, count_exp, false);
25006 count_exp = const0_rtx;
25012 rtx_code_label *hot_label = gen_label_rtx ();
25013 if (jump_around_label == NULL_RTX)
25014 jump_around_label = gen_label_rtx ();
25015 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
25016 LEU, 0, counter_mode (count_exp),
25018 predict_jump (REG_BR_PROB_BASE * 90 / 100);
25020 set_storage_via_libcall (dst, count_exp, val_exp, false);
25022 emit_block_move_via_libcall (dst, src, count_exp, false);
25023 emit_jump (jump_around_label);
25024 emit_label (hot_label);
25028 /* Step 2: Alignment prologue. */
25029 /* Do the expensive promotion once we branched off the small blocks. */
25030 if (issetmem && !promoted_val)
25031 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25032 desired_align, align);
25034 if (desired_align > align && !misaligned_prologue_used)
25036 if (align_bytes == 0)
25038 /* Except for the first move in prologue, we no longer know
25039 constant offset in aliasing info. It don't seems to worth
25040 the pain to maintain it for the first move, so throw away
25042 dst = change_address (dst, BLKmode, destreg);
25044 src = change_address (src, BLKmode, srcreg);
25045 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25046 promoted_val, vec_promoted_val,
25047 count_exp, align, desired_align,
25049 /* At most desired_align - align bytes are copied. */
25050 if (min_size < (unsigned)(desired_align - align))
25053 min_size -= desired_align - align;
25057 /* If we know how many bytes need to be stored before dst is
25058 sufficiently aligned, maintain aliasing info accurately. */
25059 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25067 count_exp = plus_constant (counter_mode (count_exp),
25068 count_exp, -align_bytes);
25069 count -= align_bytes;
25070 min_size -= align_bytes;
25071 max_size -= align_bytes;
25073 if (need_zero_guard
25075 && (count < (unsigned HOST_WIDE_INT) size_needed
25076 || (align_bytes == 0
25077 && count < ((unsigned HOST_WIDE_INT) size_needed
25078 + desired_align - align))))
25080 /* It is possible that we copied enough so the main loop will not
25082 gcc_assert (size_needed > 1);
25083 if (label == NULL_RTX)
25084 label = gen_label_rtx ();
25085 emit_cmp_and_jump_insns (count_exp,
25086 GEN_INT (size_needed),
25087 LTU, 0, counter_mode (count_exp), 1, label);
25088 if (expected_size == -1
25089 || expected_size < (desired_align - align) / 2 + size_needed)
25090 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25092 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25095 if (label && size_needed == 1)
25097 emit_label (label);
25098 LABEL_NUSES (label) = 1;
25100 epilogue_size_needed = 1;
25102 promoted_val = val_exp;
25104 else if (label == NULL_RTX && !misaligned_prologue_used)
25105 epilogue_size_needed = size_needed;
25107 /* Step 3: Main loop. */
25114 gcc_unreachable ();
25117 case unrolled_loop:
25118 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25119 count_exp, move_mode, unroll_factor,
25120 expected_size, issetmem);
25123 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25124 vec_promoted_val, count_exp, move_mode,
25125 unroll_factor, expected_size, issetmem);
25127 case rep_prefix_8_byte:
25128 case rep_prefix_4_byte:
25129 case rep_prefix_1_byte:
25130 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25131 val_exp, count_exp, move_mode, issetmem);
25134 /* Adjust properly the offset of src and dest memory for aliasing. */
25135 if (CONST_INT_P (count_exp))
25138 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25139 (count / size_needed) * size_needed);
25140 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25141 (count / size_needed) * size_needed);
25146 src = change_address (src, BLKmode, srcreg);
25147 dst = change_address (dst, BLKmode, destreg);
25150 /* Step 4: Epilogue to copy the remaining bytes. */
25154 /* When the main loop is done, COUNT_EXP might hold original count,
25155 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25156 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25157 bytes. Compensate if needed. */
25159 if (size_needed < epilogue_size_needed)
25162 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25163 GEN_INT (size_needed - 1), count_exp, 1,
25165 if (tmp != count_exp)
25166 emit_move_insn (count_exp, tmp);
25168 emit_label (label);
25169 LABEL_NUSES (label) = 1;
25172 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25174 if (force_loopy_epilogue)
25175 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25176 epilogue_size_needed);
25180 expand_setmem_epilogue (dst, destreg, promoted_val,
25181 vec_promoted_val, count_exp,
25182 epilogue_size_needed);
25184 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25185 epilogue_size_needed);
25188 if (jump_around_label)
25189 emit_label (jump_around_label);
25194 /* Expand the appropriate insns for doing strlen if not just doing
25197 out = result, initialized with the start address
25198 align_rtx = alignment of the address.
25199 scratch = scratch register, initialized with the startaddress when
25200 not aligned, otherwise undefined
25202 This is just the body. It needs the initializations mentioned above and
25203 some address computing at the end. These things are done in i386.md. */
25206 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25210 rtx_code_label *align_2_label = NULL;
25211 rtx_code_label *align_3_label = NULL;
25212 rtx_code_label *align_4_label = gen_label_rtx ();
25213 rtx_code_label *end_0_label = gen_label_rtx ();
25215 rtx tmpreg = gen_reg_rtx (SImode);
25216 rtx scratch = gen_reg_rtx (SImode);
25220 if (CONST_INT_P (align_rtx))
25221 align = INTVAL (align_rtx);
25223 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25225 /* Is there a known alignment and is it less than 4? */
25228 rtx scratch1 = gen_reg_rtx (Pmode);
25229 emit_move_insn (scratch1, out);
25230 /* Is there a known alignment and is it not 2? */
25233 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25234 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25236 /* Leave just the 3 lower bits. */
25237 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25238 NULL_RTX, 0, OPTAB_WIDEN);
25240 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25241 Pmode, 1, align_4_label);
25242 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25243 Pmode, 1, align_2_label);
25244 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25245 Pmode, 1, align_3_label);
25249 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25250 check if is aligned to 4 - byte. */
25252 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25253 NULL_RTX, 0, OPTAB_WIDEN);
25255 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25256 Pmode, 1, align_4_label);
25259 mem = change_address (src, QImode, out);
25261 /* Now compare the bytes. */
25263 /* Compare the first n unaligned byte on a byte per byte basis. */
25264 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25265 QImode, 1, end_0_label);
25267 /* Increment the address. */
25268 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25270 /* Not needed with an alignment of 2 */
25273 emit_label (align_2_label);
25275 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25278 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25280 emit_label (align_3_label);
25283 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25286 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25289 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25290 align this loop. It gives only huge programs, but does not help to
25292 emit_label (align_4_label);
25294 mem = change_address (src, SImode, out);
25295 emit_move_insn (scratch, mem);
25296 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25298 /* This formula yields a nonzero result iff one of the bytes is zero.
25299 This saves three branches inside loop and many cycles. */
25301 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25302 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25303 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25304 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25305 gen_int_mode (0x80808080, SImode)));
25306 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25311 rtx reg = gen_reg_rtx (SImode);
25312 rtx reg2 = gen_reg_rtx (Pmode);
25313 emit_move_insn (reg, tmpreg);
25314 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25316 /* If zero is not in the first two bytes, move two bytes forward. */
25317 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25318 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25319 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25320 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
25321 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25324 /* Emit lea manually to avoid clobbering of flags. */
25325 emit_insn (gen_rtx_SET (SImode, reg2,
25326 gen_rtx_PLUS (Pmode, out, const2_rtx)));
25328 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25329 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25330 emit_insn (gen_rtx_SET (VOIDmode, out,
25331 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25337 rtx_code_label *end_2_label = gen_label_rtx ();
25338 /* Is zero in the first two bytes? */
25340 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25341 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25342 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25343 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25344 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25346 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25347 JUMP_LABEL (tmp) = end_2_label;
25349 /* Not in the first two. Move two bytes forward. */
25350 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25351 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25353 emit_label (end_2_label);
25357 /* Avoid branch in fixing the byte. */
25358 tmpreg = gen_lowpart (QImode, tmpreg);
25359 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
25360 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25361 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25362 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25364 emit_label (end_0_label);
25367 /* Expand strlen. */
25370 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25372 rtx addr, scratch1, scratch2, scratch3, scratch4;
25374 /* The generic case of strlen expander is long. Avoid it's
25375 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25377 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25378 && !TARGET_INLINE_ALL_STRINGOPS
25379 && !optimize_insn_for_size_p ()
25380 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25383 addr = force_reg (Pmode, XEXP (src, 0));
25384 scratch1 = gen_reg_rtx (Pmode);
25386 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25387 && !optimize_insn_for_size_p ())
25389 /* Well it seems that some optimizer does not combine a call like
25390 foo(strlen(bar), strlen(bar));
25391 when the move and the subtraction is done here. It does calculate
25392 the length just once when these instructions are done inside of
25393 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25394 often used and I use one fewer register for the lifetime of
25395 output_strlen_unroll() this is better. */
25397 emit_move_insn (out, addr);
25399 ix86_expand_strlensi_unroll_1 (out, src, align);
25401 /* strlensi_unroll_1 returns the address of the zero at the end of
25402 the string, like memchr(), so compute the length by subtracting
25403 the start address. */
25404 emit_insn (ix86_gen_sub3 (out, out, addr));
25410 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25411 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25414 scratch2 = gen_reg_rtx (Pmode);
25415 scratch3 = gen_reg_rtx (Pmode);
25416 scratch4 = force_reg (Pmode, constm1_rtx);
25418 emit_move_insn (scratch3, addr);
25419 eoschar = force_reg (QImode, eoschar);
25421 src = replace_equiv_address_nv (src, scratch3);
25423 /* If .md starts supporting :P, this can be done in .md. */
25424 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25425 scratch4), UNSPEC_SCAS);
25426 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25427 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25428 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25433 /* For given symbol (function) construct code to compute address of it's PLT
25434 entry in large x86-64 PIC model. */
25436 construct_plt_address (rtx symbol)
25440 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25441 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25442 gcc_assert (Pmode == DImode);
25444 tmp = gen_reg_rtx (Pmode);
25445 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25447 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25448 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25453 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25455 rtx pop, bool sibcall)
25458 rtx use = NULL, call;
25459 unsigned int vec_len = 0;
25461 if (pop == const0_rtx)
25463 gcc_assert (!TARGET_64BIT || !pop);
25465 if (TARGET_MACHO && !TARGET_64BIT)
25468 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25469 fnaddr = machopic_indirect_call_target (fnaddr);
25474 /* Static functions and indirect calls don't need the pic register. */
25477 || (ix86_cmodel == CM_LARGE_PIC
25478 && DEFAULT_ABI != MS_ABI))
25479 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25480 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25482 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25483 if (ix86_use_pseudo_pic_reg ())
25484 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25485 pic_offset_table_rtx);
25489 /* Skip setting up RAX register for -mskip-rax-setup when there are no
25490 parameters passed in vector registers. */
25492 && (INTVAL (callarg2) > 0
25493 || (INTVAL (callarg2) == 0
25494 && (TARGET_SSE || !flag_skip_rax_setup))))
25496 rtx al = gen_rtx_REG (QImode, AX_REG);
25497 emit_move_insn (al, callarg2);
25498 use_reg (&use, al);
25501 if (ix86_cmodel == CM_LARGE_PIC
25504 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25505 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25506 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25508 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25509 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25511 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25512 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25515 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25519 /* We should add bounds as destination register in case
25520 pointer with bounds may be returned. */
25521 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25523 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25524 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25525 retval = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, retval, b0, b1));
25526 chkp_put_regs_to_expr_list (retval);
25529 call = gen_rtx_SET (VOIDmode, retval, call);
25531 vec[vec_len++] = call;
25535 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25536 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25537 vec[vec_len++] = pop;
25540 if (TARGET_64BIT_MS_ABI
25541 && (!callarg2 || INTVAL (callarg2) != -2))
25543 int const cregs_size
25544 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25547 for (i = 0; i < cregs_size; i++)
25549 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25550 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25552 clobber_reg (&use, gen_rtx_REG (mode, regno));
25557 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25558 call = emit_call_insn (call);
25560 CALL_INSN_FUNCTION_USAGE (call) = use;
25565 /* Output the assembly for a call instruction. */
25568 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25570 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25571 bool seh_nop_p = false;
25574 if (SIBLING_CALL_P (insn))
25577 xasm = "%!jmp\t%P0";
25578 /* SEH epilogue detection requires the indirect branch case
25579 to include REX.W. */
25580 else if (TARGET_SEH)
25581 xasm = "%!rex.W jmp %A0";
25583 xasm = "%!jmp\t%A0";
25585 output_asm_insn (xasm, &call_op);
25589 /* SEH unwinding can require an extra nop to be emitted in several
25590 circumstances. Determine if we have one of those. */
25595 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25597 /* If we get to another real insn, we don't need the nop. */
25601 /* If we get to the epilogue note, prevent a catch region from
25602 being adjacent to the standard epilogue sequence. If non-
25603 call-exceptions, we'll have done this during epilogue emission. */
25604 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25605 && !flag_non_call_exceptions
25606 && !can_throw_internal (insn))
25613 /* If we didn't find a real insn following the call, prevent the
25614 unwinder from looking into the next function. */
25620 xasm = "%!call\t%P0";
25622 xasm = "%!call\t%A0";
25624 output_asm_insn (xasm, &call_op);
25632 /* Clear stack slot assignments remembered from previous functions.
25633 This is called from INIT_EXPANDERS once before RTL is emitted for each
25636 static struct machine_function *
25637 ix86_init_machine_status (void)
25639 struct machine_function *f;
25641 f = ggc_cleared_alloc<machine_function> ();
25642 f->use_fast_prologue_epilogue_nregs = -1;
25643 f->call_abi = ix86_abi;
25648 /* Return a MEM corresponding to a stack slot with mode MODE.
25649 Allocate a new slot if necessary.
25651 The RTL for a function can have several slots available: N is
25652 which slot to use. */
25655 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25657 struct stack_local_entry *s;
25659 gcc_assert (n < MAX_386_STACK_LOCALS);
25661 for (s = ix86_stack_locals; s; s = s->next)
25662 if (s->mode == mode && s->n == n)
25663 return validize_mem (copy_rtx (s->rtl));
25665 s = ggc_alloc<stack_local_entry> ();
25668 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25670 s->next = ix86_stack_locals;
25671 ix86_stack_locals = s;
25672 return validize_mem (copy_rtx (s->rtl));
25676 ix86_instantiate_decls (void)
25678 struct stack_local_entry *s;
25680 for (s = ix86_stack_locals; s; s = s->next)
25681 if (s->rtl != NULL_RTX)
25682 instantiate_decl_rtl (s->rtl);
25685 /* Check whether x86 address PARTS is a pc-relative address. */
25688 rip_relative_addr_p (struct ix86_address *parts)
25690 rtx base, index, disp;
25692 base = parts->base;
25693 index = parts->index;
25694 disp = parts->disp;
25696 if (disp && !base && !index)
25702 if (GET_CODE (disp) == CONST)
25703 symbol = XEXP (disp, 0);
25704 if (GET_CODE (symbol) == PLUS
25705 && CONST_INT_P (XEXP (symbol, 1)))
25706 symbol = XEXP (symbol, 0);
25708 if (GET_CODE (symbol) == LABEL_REF
25709 || (GET_CODE (symbol) == SYMBOL_REF
25710 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25711 || (GET_CODE (symbol) == UNSPEC
25712 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25713 || XINT (symbol, 1) == UNSPEC_PCREL
25714 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25721 /* Calculate the length of the memory address in the instruction encoding.
25722 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25723 or other prefixes. We never generate addr32 prefix for LEA insn. */
25726 memory_address_length (rtx addr, bool lea)
25728 struct ix86_address parts;
25729 rtx base, index, disp;
25733 if (GET_CODE (addr) == PRE_DEC
25734 || GET_CODE (addr) == POST_INC
25735 || GET_CODE (addr) == PRE_MODIFY
25736 || GET_CODE (addr) == POST_MODIFY)
25739 ok = ix86_decompose_address (addr, &parts);
25742 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25744 /* If this is not LEA instruction, add the length of addr32 prefix. */
25745 if (TARGET_64BIT && !lea
25746 && (SImode_address_operand (addr, VOIDmode)
25747 || (parts.base && GET_MODE (parts.base) == SImode)
25748 || (parts.index && GET_MODE (parts.index) == SImode)))
25752 index = parts.index;
25755 if (base && GET_CODE (base) == SUBREG)
25756 base = SUBREG_REG (base);
25757 if (index && GET_CODE (index) == SUBREG)
25758 index = SUBREG_REG (index);
25760 gcc_assert (base == NULL_RTX || REG_P (base));
25761 gcc_assert (index == NULL_RTX || REG_P (index));
25764 - esp as the base always wants an index,
25765 - ebp as the base always wants a displacement,
25766 - r12 as the base always wants an index,
25767 - r13 as the base always wants a displacement. */
25769 /* Register Indirect. */
25770 if (base && !index && !disp)
25772 /* esp (for its index) and ebp (for its displacement) need
25773 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25775 if (base == arg_pointer_rtx
25776 || base == frame_pointer_rtx
25777 || REGNO (base) == SP_REG
25778 || REGNO (base) == BP_REG
25779 || REGNO (base) == R12_REG
25780 || REGNO (base) == R13_REG)
25784 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25785 is not disp32, but disp32(%rip), so for disp32
25786 SIB byte is needed, unless print_operand_address
25787 optimizes it into disp32(%rip) or (%rip) is implied
25789 else if (disp && !base && !index)
25792 if (rip_relative_addr_p (&parts))
25797 /* Find the length of the displacement constant. */
25800 if (base && satisfies_constraint_K (disp))
25805 /* ebp always wants a displacement. Similarly r13. */
25806 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25809 /* An index requires the two-byte modrm form.... */
25811 /* ...like esp (or r12), which always wants an index. */
25812 || base == arg_pointer_rtx
25813 || base == frame_pointer_rtx
25814 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25821 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25822 is set, expect that insn have 8bit immediate alternative. */
25824 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25828 extract_insn_cached (insn);
25829 for (i = recog_data.n_operands - 1; i >= 0; --i)
25830 if (CONSTANT_P (recog_data.operand[i]))
25832 enum attr_mode mode = get_attr_mode (insn);
25835 if (shortform && CONST_INT_P (recog_data.operand[i]))
25837 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25844 ival = trunc_int_for_mode (ival, HImode);
25847 ival = trunc_int_for_mode (ival, SImode);
25852 if (IN_RANGE (ival, -128, 127))
25869 /* Immediates for DImode instructions are encoded
25870 as 32bit sign extended values. */
25875 fatal_insn ("unknown insn mode", insn);
25881 /* Compute default value for "length_address" attribute. */
25883 ix86_attr_length_address_default (rtx_insn *insn)
25887 if (get_attr_type (insn) == TYPE_LEA)
25889 rtx set = PATTERN (insn), addr;
25891 if (GET_CODE (set) == PARALLEL)
25892 set = XVECEXP (set, 0, 0);
25894 gcc_assert (GET_CODE (set) == SET);
25896 addr = SET_SRC (set);
25898 return memory_address_length (addr, true);
25901 extract_insn_cached (insn);
25902 for (i = recog_data.n_operands - 1; i >= 0; --i)
25903 if (MEM_P (recog_data.operand[i]))
25905 constrain_operands_cached (insn, reload_completed);
25906 if (which_alternative != -1)
25908 const char *constraints = recog_data.constraints[i];
25909 int alt = which_alternative;
25911 while (*constraints == '=' || *constraints == '+')
25914 while (*constraints++ != ',')
25916 /* Skip ignored operands. */
25917 if (*constraints == 'X')
25920 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25925 /* Compute default value for "length_vex" attribute. It includes
25926 2 or 3 byte VEX prefix and 1 opcode byte. */
25929 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
25934 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25935 byte VEX prefix. */
25936 if (!has_0f_opcode || has_vex_w)
25939 /* We can always use 2 byte VEX prefix in 32bit. */
25943 extract_insn_cached (insn);
25945 for (i = recog_data.n_operands - 1; i >= 0; --i)
25946 if (REG_P (recog_data.operand[i]))
25948 /* REX.W bit uses 3 byte VEX prefix. */
25949 if (GET_MODE (recog_data.operand[i]) == DImode
25950 && GENERAL_REG_P (recog_data.operand[i]))
25955 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25956 if (MEM_P (recog_data.operand[i])
25957 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25964 /* Return the maximum number of instructions a cpu can issue. */
25967 ix86_issue_rate (void)
25971 case PROCESSOR_PENTIUM:
25972 case PROCESSOR_BONNELL:
25973 case PROCESSOR_SILVERMONT:
25974 case PROCESSOR_KNL:
25975 case PROCESSOR_INTEL:
25977 case PROCESSOR_BTVER2:
25978 case PROCESSOR_PENTIUM4:
25979 case PROCESSOR_NOCONA:
25982 case PROCESSOR_PENTIUMPRO:
25983 case PROCESSOR_ATHLON:
25985 case PROCESSOR_AMDFAM10:
25986 case PROCESSOR_GENERIC:
25987 case PROCESSOR_BTVER1:
25990 case PROCESSOR_BDVER1:
25991 case PROCESSOR_BDVER2:
25992 case PROCESSOR_BDVER3:
25993 case PROCESSOR_BDVER4:
25994 case PROCESSOR_CORE2:
25995 case PROCESSOR_NEHALEM:
25996 case PROCESSOR_SANDYBRIDGE:
25997 case PROCESSOR_HASWELL:
26005 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
26006 by DEP_INSN and nothing set by DEP_INSN. */
26009 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
26013 /* Simplify the test for uninteresting insns. */
26014 if (insn_type != TYPE_SETCC
26015 && insn_type != TYPE_ICMOV
26016 && insn_type != TYPE_FCMOV
26017 && insn_type != TYPE_IBR)
26020 if ((set = single_set (dep_insn)) != 0)
26022 set = SET_DEST (set);
26025 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
26026 && XVECLEN (PATTERN (dep_insn), 0) == 2
26027 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
26028 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
26030 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26031 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26036 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
26039 /* This test is true if the dependent insn reads the flags but
26040 not any other potentially set register. */
26041 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
26044 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
26050 /* Return true iff USE_INSN has a memory address with operands set by
26054 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26057 extract_insn_cached (use_insn);
26058 for (i = recog_data.n_operands - 1; i >= 0; --i)
26059 if (MEM_P (recog_data.operand[i]))
26061 rtx addr = XEXP (recog_data.operand[i], 0);
26062 return modified_in_p (addr, set_insn) != 0;
26067 /* Helper function for exact_store_load_dependency.
26068 Return true if addr is found in insn. */
26070 exact_dependency_1 (rtx addr, rtx insn)
26072 enum rtx_code code;
26073 const char *format_ptr;
26076 code = GET_CODE (insn);
26080 if (rtx_equal_p (addr, insn))
26095 format_ptr = GET_RTX_FORMAT (code);
26096 for (i = 0; i < GET_RTX_LENGTH (code); i++)
26098 switch (*format_ptr++)
26101 if (exact_dependency_1 (addr, XEXP (insn, i)))
26105 for (j = 0; j < XVECLEN (insn, i); j++)
26106 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26114 /* Return true if there exists exact dependency for store & load, i.e.
26115 the same memory address is used in them. */
26117 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26121 set1 = single_set (store);
26124 if (!MEM_P (SET_DEST (set1)))
26126 set2 = single_set (load);
26129 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26135 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26137 enum attr_type insn_type, dep_insn_type;
26138 enum attr_memory memory;
26140 int dep_insn_code_number;
26142 /* Anti and output dependencies have zero cost on all CPUs. */
26143 if (REG_NOTE_KIND (link) != 0)
26146 dep_insn_code_number = recog_memoized (dep_insn);
26148 /* If we can't recognize the insns, we can't really do anything. */
26149 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26152 insn_type = get_attr_type (insn);
26153 dep_insn_type = get_attr_type (dep_insn);
26157 case PROCESSOR_PENTIUM:
26158 /* Address Generation Interlock adds a cycle of latency. */
26159 if (insn_type == TYPE_LEA)
26161 rtx addr = PATTERN (insn);
26163 if (GET_CODE (addr) == PARALLEL)
26164 addr = XVECEXP (addr, 0, 0);
26166 gcc_assert (GET_CODE (addr) == SET);
26168 addr = SET_SRC (addr);
26169 if (modified_in_p (addr, dep_insn))
26172 else if (ix86_agi_dependent (dep_insn, insn))
26175 /* ??? Compares pair with jump/setcc. */
26176 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26179 /* Floating point stores require value to be ready one cycle earlier. */
26180 if (insn_type == TYPE_FMOV
26181 && get_attr_memory (insn) == MEMORY_STORE
26182 && !ix86_agi_dependent (dep_insn, insn))
26186 case PROCESSOR_PENTIUMPRO:
26187 /* INT->FP conversion is expensive. */
26188 if (get_attr_fp_int_src (dep_insn))
26191 /* There is one cycle extra latency between an FP op and a store. */
26192 if (insn_type == TYPE_FMOV
26193 && (set = single_set (dep_insn)) != NULL_RTX
26194 && (set2 = single_set (insn)) != NULL_RTX
26195 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26196 && MEM_P (SET_DEST (set2)))
26199 memory = get_attr_memory (insn);
26201 /* Show ability of reorder buffer to hide latency of load by executing
26202 in parallel with previous instruction in case
26203 previous instruction is not needed to compute the address. */
26204 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26205 && !ix86_agi_dependent (dep_insn, insn))
26207 /* Claim moves to take one cycle, as core can issue one load
26208 at time and the next load can start cycle later. */
26209 if (dep_insn_type == TYPE_IMOV
26210 || dep_insn_type == TYPE_FMOV)
26218 /* The esp dependency is resolved before
26219 the instruction is really finished. */
26220 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26221 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26224 /* INT->FP conversion is expensive. */
26225 if (get_attr_fp_int_src (dep_insn))
26228 memory = get_attr_memory (insn);
26230 /* Show ability of reorder buffer to hide latency of load by executing
26231 in parallel with previous instruction in case
26232 previous instruction is not needed to compute the address. */
26233 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26234 && !ix86_agi_dependent (dep_insn, insn))
26236 /* Claim moves to take one cycle, as core can issue one load
26237 at time and the next load can start cycle later. */
26238 if (dep_insn_type == TYPE_IMOV
26239 || dep_insn_type == TYPE_FMOV)
26248 case PROCESSOR_AMDFAM10:
26249 case PROCESSOR_BDVER1:
26250 case PROCESSOR_BDVER2:
26251 case PROCESSOR_BDVER3:
26252 case PROCESSOR_BDVER4:
26253 case PROCESSOR_BTVER1:
26254 case PROCESSOR_BTVER2:
26255 case PROCESSOR_GENERIC:
26256 /* Stack engine allows to execute push&pop instructions in parall. */
26257 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26258 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26262 case PROCESSOR_ATHLON:
26264 memory = get_attr_memory (insn);
26266 /* Show ability of reorder buffer to hide latency of load by executing
26267 in parallel with previous instruction in case
26268 previous instruction is not needed to compute the address. */
26269 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26270 && !ix86_agi_dependent (dep_insn, insn))
26272 enum attr_unit unit = get_attr_unit (insn);
26275 /* Because of the difference between the length of integer and
26276 floating unit pipeline preparation stages, the memory operands
26277 for floating point are cheaper.
26279 ??? For Athlon it the difference is most probably 2. */
26280 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26283 loadcost = TARGET_ATHLON ? 2 : 0;
26285 if (cost >= loadcost)
26292 case PROCESSOR_CORE2:
26293 case PROCESSOR_NEHALEM:
26294 case PROCESSOR_SANDYBRIDGE:
26295 case PROCESSOR_HASWELL:
26296 /* Stack engine allows to execute push&pop instructions in parall. */
26297 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26298 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26301 memory = get_attr_memory (insn);
26303 /* Show ability of reorder buffer to hide latency of load by executing
26304 in parallel with previous instruction in case
26305 previous instruction is not needed to compute the address. */
26306 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26307 && !ix86_agi_dependent (dep_insn, insn))
26316 case PROCESSOR_SILVERMONT:
26317 case PROCESSOR_KNL:
26318 case PROCESSOR_INTEL:
26319 if (!reload_completed)
26322 /* Increase cost of integer loads. */
26323 memory = get_attr_memory (dep_insn);
26324 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26326 enum attr_unit unit = get_attr_unit (dep_insn);
26327 if (unit == UNIT_INTEGER && cost == 1)
26329 if (memory == MEMORY_LOAD)
26333 /* Increase cost of ld/st for short int types only
26334 because of store forwarding issue. */
26335 rtx set = single_set (dep_insn);
26336 if (set && (GET_MODE (SET_DEST (set)) == QImode
26337 || GET_MODE (SET_DEST (set)) == HImode))
26339 /* Increase cost of store/load insn if exact
26340 dependence exists and it is load insn. */
26341 enum attr_memory insn_memory = get_attr_memory (insn);
26342 if (insn_memory == MEMORY_LOAD
26343 && exact_store_load_dependency (dep_insn, insn))
26357 /* How many alternative schedules to try. This should be as wide as the
26358 scheduling freedom in the DFA, but no wider. Making this value too
26359 large results extra work for the scheduler. */
26362 ia32_multipass_dfa_lookahead (void)
26366 case PROCESSOR_PENTIUM:
26369 case PROCESSOR_PENTIUMPRO:
26373 case PROCESSOR_BDVER1:
26374 case PROCESSOR_BDVER2:
26375 case PROCESSOR_BDVER3:
26376 case PROCESSOR_BDVER4:
26377 /* We use lookahead value 4 for BD both before and after reload
26378 schedules. Plan is to have value 8 included for O3. */
26381 case PROCESSOR_CORE2:
26382 case PROCESSOR_NEHALEM:
26383 case PROCESSOR_SANDYBRIDGE:
26384 case PROCESSOR_HASWELL:
26385 case PROCESSOR_BONNELL:
26386 case PROCESSOR_SILVERMONT:
26387 case PROCESSOR_KNL:
26388 case PROCESSOR_INTEL:
26389 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26390 as many instructions can be executed on a cycle, i.e.,
26391 issue_rate. I wonder why tuning for many CPUs does not do this. */
26392 if (reload_completed)
26393 return ix86_issue_rate ();
26394 /* Don't use lookahead for pre-reload schedule to save compile time. */
26402 /* Return true if target platform supports macro-fusion. */
26405 ix86_macro_fusion_p ()
26407 return TARGET_FUSE_CMP_AND_BRANCH;
26410 /* Check whether current microarchitecture support macro fusion
26411 for insn pair "CONDGEN + CONDJMP". Refer to
26412 "Intel Architectures Optimization Reference Manual". */
26415 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26418 enum rtx_code ccode;
26419 rtx compare_set = NULL_RTX, test_if, cond;
26420 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26422 if (!any_condjump_p (condjmp))
26425 if (get_attr_type (condgen) != TYPE_TEST
26426 && get_attr_type (condgen) != TYPE_ICMP
26427 && get_attr_type (condgen) != TYPE_INCDEC
26428 && get_attr_type (condgen) != TYPE_ALU)
26431 compare_set = single_set (condgen);
26432 if (compare_set == NULL_RTX
26433 && !TARGET_FUSE_ALU_AND_BRANCH)
26436 if (compare_set == NULL_RTX)
26439 rtx pat = PATTERN (condgen);
26440 for (i = 0; i < XVECLEN (pat, 0); i++)
26441 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26443 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26444 if (GET_CODE (set_src) == COMPARE)
26445 compare_set = XVECEXP (pat, 0, i);
26447 alu_set = XVECEXP (pat, 0, i);
26450 if (compare_set == NULL_RTX)
26452 src = SET_SRC (compare_set);
26453 if (GET_CODE (src) != COMPARE)
26456 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26458 if ((MEM_P (XEXP (src, 0))
26459 && CONST_INT_P (XEXP (src, 1)))
26460 || (MEM_P (XEXP (src, 1))
26461 && CONST_INT_P (XEXP (src, 0))))
26464 /* No fusion for RIP-relative address. */
26465 if (MEM_P (XEXP (src, 0)))
26466 addr = XEXP (XEXP (src, 0), 0);
26467 else if (MEM_P (XEXP (src, 1)))
26468 addr = XEXP (XEXP (src, 1), 0);
26471 ix86_address parts;
26472 int ok = ix86_decompose_address (addr, &parts);
26475 if (rip_relative_addr_p (&parts))
26479 test_if = SET_SRC (pc_set (condjmp));
26480 cond = XEXP (test_if, 0);
26481 ccode = GET_CODE (cond);
26482 /* Check whether conditional jump use Sign or Overflow Flags. */
26483 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26490 /* Return true for TYPE_TEST and TYPE_ICMP. */
26491 if (get_attr_type (condgen) == TYPE_TEST
26492 || get_attr_type (condgen) == TYPE_ICMP)
26495 /* The following is the case that macro-fusion for alu + jmp. */
26496 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26499 /* No fusion for alu op with memory destination operand. */
26500 dest = SET_DEST (alu_set);
26504 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26506 if (get_attr_type (condgen) == TYPE_INCDEC
26516 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26517 execution. It is applied if
26518 (1) IMUL instruction is on the top of list;
26519 (2) There exists the only producer of independent IMUL instruction in
26521 Return index of IMUL producer if it was found and -1 otherwise. */
26523 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26526 rtx set, insn1, insn2;
26527 sd_iterator_def sd_it;
26532 if (!TARGET_BONNELL)
26535 /* Check that IMUL instruction is on the top of ready list. */
26536 insn = ready[n_ready - 1];
26537 set = single_set (insn);
26540 if (!(GET_CODE (SET_SRC (set)) == MULT
26541 && GET_MODE (SET_SRC (set)) == SImode))
26544 /* Search for producer of independent IMUL instruction. */
26545 for (i = n_ready - 2; i >= 0; i--)
26548 if (!NONDEBUG_INSN_P (insn))
26550 /* Skip IMUL instruction. */
26551 insn2 = PATTERN (insn);
26552 if (GET_CODE (insn2) == PARALLEL)
26553 insn2 = XVECEXP (insn2, 0, 0);
26554 if (GET_CODE (insn2) == SET
26555 && GET_CODE (SET_SRC (insn2)) == MULT
26556 && GET_MODE (SET_SRC (insn2)) == SImode)
26559 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26562 con = DEP_CON (dep);
26563 if (!NONDEBUG_INSN_P (con))
26565 insn1 = PATTERN (con);
26566 if (GET_CODE (insn1) == PARALLEL)
26567 insn1 = XVECEXP (insn1, 0, 0);
26569 if (GET_CODE (insn1) == SET
26570 && GET_CODE (SET_SRC (insn1)) == MULT
26571 && GET_MODE (SET_SRC (insn1)) == SImode)
26573 sd_iterator_def sd_it1;
26575 /* Check if there is no other dependee for IMUL. */
26577 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26580 pro = DEP_PRO (dep1);
26581 if (!NONDEBUG_INSN_P (pro))
26596 /* Try to find the best candidate on the top of ready list if two insns
26597 have the same priority - candidate is best if its dependees were
26598 scheduled earlier. Applied for Silvermont only.
26599 Return true if top 2 insns must be interchanged. */
26601 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26603 rtx_insn *top = ready[n_ready - 1];
26604 rtx_insn *next = ready[n_ready - 2];
26606 sd_iterator_def sd_it;
26610 #define INSN_TICK(INSN) (HID (INSN)->tick)
26612 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26615 if (!NONDEBUG_INSN_P (top))
26617 if (!NONJUMP_INSN_P (top))
26619 if (!NONDEBUG_INSN_P (next))
26621 if (!NONJUMP_INSN_P (next))
26623 set = single_set (top);
26626 set = single_set (next);
26630 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26632 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26634 /* Determine winner more precise. */
26635 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26638 pro = DEP_PRO (dep);
26639 if (!NONDEBUG_INSN_P (pro))
26641 if (INSN_TICK (pro) > clock1)
26642 clock1 = INSN_TICK (pro);
26644 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26647 pro = DEP_PRO (dep);
26648 if (!NONDEBUG_INSN_P (pro))
26650 if (INSN_TICK (pro) > clock2)
26651 clock2 = INSN_TICK (pro);
26654 if (clock1 == clock2)
26656 /* Determine winner - load must win. */
26657 enum attr_memory memory1, memory2;
26658 memory1 = get_attr_memory (top);
26659 memory2 = get_attr_memory (next);
26660 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26663 return (bool) (clock2 < clock1);
26669 /* Perform possible reodering of ready list for Atom/Silvermont only.
26670 Return issue rate. */
26672 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26673 int *pn_ready, int clock_var)
26675 int issue_rate = -1;
26676 int n_ready = *pn_ready;
26681 /* Set up issue rate. */
26682 issue_rate = ix86_issue_rate ();
26684 /* Do reodering for BONNELL/SILVERMONT only. */
26685 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26688 /* Nothing to do if ready list contains only 1 instruction. */
26692 /* Do reodering for post-reload scheduler only. */
26693 if (!reload_completed)
26696 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26698 if (sched_verbose > 1)
26699 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26700 INSN_UID (ready[index]));
26702 /* Put IMUL producer (ready[index]) at the top of ready list. */
26703 insn = ready[index];
26704 for (i = index; i < n_ready - 1; i++)
26705 ready[i] = ready[i + 1];
26706 ready[n_ready - 1] = insn;
26709 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26711 if (sched_verbose > 1)
26712 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26713 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26714 /* Swap 2 top elements of ready list. */
26715 insn = ready[n_ready - 1];
26716 ready[n_ready - 1] = ready[n_ready - 2];
26717 ready[n_ready - 2] = insn;
26723 ix86_class_likely_spilled_p (reg_class_t);
26725 /* Returns true if lhs of insn is HW function argument register and set up
26726 is_spilled to true if it is likely spilled HW register. */
26728 insn_is_function_arg (rtx insn, bool* is_spilled)
26732 if (!NONDEBUG_INSN_P (insn))
26734 /* Call instructions are not movable, ignore it. */
26737 insn = PATTERN (insn);
26738 if (GET_CODE (insn) == PARALLEL)
26739 insn = XVECEXP (insn, 0, 0);
26740 if (GET_CODE (insn) != SET)
26742 dst = SET_DEST (insn);
26743 if (REG_P (dst) && HARD_REGISTER_P (dst)
26744 && ix86_function_arg_regno_p (REGNO (dst)))
26746 /* Is it likely spilled HW register? */
26747 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26748 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26749 *is_spilled = true;
26755 /* Add output dependencies for chain of function adjacent arguments if only
26756 there is a move to likely spilled HW register. Return first argument
26757 if at least one dependence was added or NULL otherwise. */
26759 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26762 rtx_insn *last = call;
26763 rtx_insn *first_arg = NULL;
26764 bool is_spilled = false;
26766 head = PREV_INSN (head);
26768 /* Find nearest to call argument passing instruction. */
26771 last = PREV_INSN (last);
26774 if (!NONDEBUG_INSN_P (last))
26776 if (insn_is_function_arg (last, &is_spilled))
26784 insn = PREV_INSN (last);
26785 if (!INSN_P (insn))
26789 if (!NONDEBUG_INSN_P (insn))
26794 if (insn_is_function_arg (insn, &is_spilled))
26796 /* Add output depdendence between two function arguments if chain
26797 of output arguments contains likely spilled HW registers. */
26799 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26800 first_arg = last = insn;
26810 /* Add output or anti dependency from insn to first_arg to restrict its code
26813 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26818 set = single_set (insn);
26821 tmp = SET_DEST (set);
26824 /* Add output dependency to the first function argument. */
26825 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26828 /* Add anti dependency. */
26829 add_dependence (first_arg, insn, REG_DEP_ANTI);
26832 /* Avoid cross block motion of function argument through adding dependency
26833 from the first non-jump instruction in bb. */
26835 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26837 rtx_insn *insn = BB_END (bb);
26841 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26843 rtx set = single_set (insn);
26846 avoid_func_arg_motion (arg, insn);
26850 if (insn == BB_HEAD (bb))
26852 insn = PREV_INSN (insn);
26856 /* Hook for pre-reload schedule - avoid motion of function arguments
26857 passed in likely spilled HW registers. */
26859 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26862 rtx_insn *first_arg = NULL;
26863 if (reload_completed)
26865 while (head != tail && DEBUG_INSN_P (head))
26866 head = NEXT_INSN (head);
26867 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26868 if (INSN_P (insn) && CALL_P (insn))
26870 first_arg = add_parameter_dependencies (insn, head);
26873 /* Add dependee for first argument to predecessors if only
26874 region contains more than one block. */
26875 basic_block bb = BLOCK_FOR_INSN (insn);
26876 int rgn = CONTAINING_RGN (bb->index);
26877 int nr_blks = RGN_NR_BLOCKS (rgn);
26878 /* Skip trivial regions and region head blocks that can have
26879 predecessors outside of region. */
26880 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26885 /* Regions are SCCs with the exception of selective
26886 scheduling with pipelining of outer blocks enabled.
26887 So also check that immediate predecessors of a non-head
26888 block are in the same region. */
26889 FOR_EACH_EDGE (e, ei, bb->preds)
26891 /* Avoid creating of loop-carried dependencies through
26892 using topological ordering in the region. */
26893 if (rgn == CONTAINING_RGN (e->src->index)
26894 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26895 add_dependee_for_func_arg (first_arg, e->src);
26903 else if (first_arg)
26904 avoid_func_arg_motion (first_arg, insn);
26907 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26908 HW registers to maximum, to schedule them at soon as possible. These are
26909 moves from function argument registers at the top of the function entry
26910 and moves from function return value registers after call. */
26912 ix86_adjust_priority (rtx_insn *insn, int priority)
26916 if (reload_completed)
26919 if (!NONDEBUG_INSN_P (insn))
26922 set = single_set (insn);
26925 rtx tmp = SET_SRC (set);
26927 && HARD_REGISTER_P (tmp)
26928 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26929 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26930 return current_sched_info->sched_max_insns_priority;
26936 /* Model decoder of Core 2/i7.
26937 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26938 track the instruction fetch block boundaries and make sure that long
26939 (9+ bytes) instructions are assigned to D0. */
26941 /* Maximum length of an insn that can be handled by
26942 a secondary decoder unit. '8' for Core 2/i7. */
26943 static int core2i7_secondary_decoder_max_insn_size;
26945 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26946 '16' for Core 2/i7. */
26947 static int core2i7_ifetch_block_size;
26949 /* Maximum number of instructions decoder can handle per cycle.
26950 '6' for Core 2/i7. */
26951 static int core2i7_ifetch_block_max_insns;
26953 typedef struct ix86_first_cycle_multipass_data_ *
26954 ix86_first_cycle_multipass_data_t;
26955 typedef const struct ix86_first_cycle_multipass_data_ *
26956 const_ix86_first_cycle_multipass_data_t;
26958 /* A variable to store target state across calls to max_issue within
26960 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26961 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26963 /* Initialize DATA. */
26965 core2i7_first_cycle_multipass_init (void *_data)
26967 ix86_first_cycle_multipass_data_t data
26968 = (ix86_first_cycle_multipass_data_t) _data;
26970 data->ifetch_block_len = 0;
26971 data->ifetch_block_n_insns = 0;
26972 data->ready_try_change = NULL;
26973 data->ready_try_change_size = 0;
26976 /* Advancing the cycle; reset ifetch block counts. */
26978 core2i7_dfa_post_advance_cycle (void)
26980 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26982 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26984 data->ifetch_block_len = 0;
26985 data->ifetch_block_n_insns = 0;
26988 static int min_insn_size (rtx_insn *);
26990 /* Filter out insns from ready_try that the core will not be able to issue
26991 on current cycle due to decoder. */
26993 core2i7_first_cycle_multipass_filter_ready_try
26994 (const_ix86_first_cycle_multipass_data_t data,
26995 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
27002 if (ready_try[n_ready])
27005 insn = get_ready_element (n_ready);
27006 insn_size = min_insn_size (insn);
27008 if (/* If this is a too long an insn for a secondary decoder ... */
27009 (!first_cycle_insn_p
27010 && insn_size > core2i7_secondary_decoder_max_insn_size)
27011 /* ... or it would not fit into the ifetch block ... */
27012 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
27013 /* ... or the decoder is full already ... */
27014 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
27015 /* ... mask the insn out. */
27017 ready_try[n_ready] = 1;
27019 if (data->ready_try_change)
27020 bitmap_set_bit (data->ready_try_change, n_ready);
27025 /* Prepare for a new round of multipass lookahead scheduling. */
27027 core2i7_first_cycle_multipass_begin (void *_data,
27028 signed char *ready_try, int n_ready,
27029 bool first_cycle_insn_p)
27031 ix86_first_cycle_multipass_data_t data
27032 = (ix86_first_cycle_multipass_data_t) _data;
27033 const_ix86_first_cycle_multipass_data_t prev_data
27034 = ix86_first_cycle_multipass_data;
27036 /* Restore the state from the end of the previous round. */
27037 data->ifetch_block_len = prev_data->ifetch_block_len;
27038 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
27040 /* Filter instructions that cannot be issued on current cycle due to
27041 decoder restrictions. */
27042 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27043 first_cycle_insn_p);
27046 /* INSN is being issued in current solution. Account for its impact on
27047 the decoder model. */
27049 core2i7_first_cycle_multipass_issue (void *_data,
27050 signed char *ready_try, int n_ready,
27051 rtx_insn *insn, const void *_prev_data)
27053 ix86_first_cycle_multipass_data_t data
27054 = (ix86_first_cycle_multipass_data_t) _data;
27055 const_ix86_first_cycle_multipass_data_t prev_data
27056 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27058 int insn_size = min_insn_size (insn);
27060 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27061 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27062 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27063 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27065 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
27066 if (!data->ready_try_change)
27068 data->ready_try_change = sbitmap_alloc (n_ready);
27069 data->ready_try_change_size = n_ready;
27071 else if (data->ready_try_change_size < n_ready)
27073 data->ready_try_change = sbitmap_resize (data->ready_try_change,
27075 data->ready_try_change_size = n_ready;
27077 bitmap_clear (data->ready_try_change);
27079 /* Filter out insns from ready_try that the core will not be able to issue
27080 on current cycle due to decoder. */
27081 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27085 /* Revert the effect on ready_try. */
27087 core2i7_first_cycle_multipass_backtrack (const void *_data,
27088 signed char *ready_try,
27089 int n_ready ATTRIBUTE_UNUSED)
27091 const_ix86_first_cycle_multipass_data_t data
27092 = (const_ix86_first_cycle_multipass_data_t) _data;
27093 unsigned int i = 0;
27094 sbitmap_iterator sbi;
27096 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27097 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27103 /* Save the result of multipass lookahead scheduling for the next round. */
27105 core2i7_first_cycle_multipass_end (const void *_data)
27107 const_ix86_first_cycle_multipass_data_t data
27108 = (const_ix86_first_cycle_multipass_data_t) _data;
27109 ix86_first_cycle_multipass_data_t next_data
27110 = ix86_first_cycle_multipass_data;
27114 next_data->ifetch_block_len = data->ifetch_block_len;
27115 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27119 /* Deallocate target data. */
27121 core2i7_first_cycle_multipass_fini (void *_data)
27123 ix86_first_cycle_multipass_data_t data
27124 = (ix86_first_cycle_multipass_data_t) _data;
27126 if (data->ready_try_change)
27128 sbitmap_free (data->ready_try_change);
27129 data->ready_try_change = NULL;
27130 data->ready_try_change_size = 0;
27134 /* Prepare for scheduling pass. */
27136 ix86_sched_init_global (FILE *, int, int)
27138 /* Install scheduling hooks for current CPU. Some of these hooks are used
27139 in time-critical parts of the scheduler, so we only set them up when
27140 they are actually used. */
27143 case PROCESSOR_CORE2:
27144 case PROCESSOR_NEHALEM:
27145 case PROCESSOR_SANDYBRIDGE:
27146 case PROCESSOR_HASWELL:
27147 /* Do not perform multipass scheduling for pre-reload schedule
27148 to save compile time. */
27149 if (reload_completed)
27151 targetm.sched.dfa_post_advance_cycle
27152 = core2i7_dfa_post_advance_cycle;
27153 targetm.sched.first_cycle_multipass_init
27154 = core2i7_first_cycle_multipass_init;
27155 targetm.sched.first_cycle_multipass_begin
27156 = core2i7_first_cycle_multipass_begin;
27157 targetm.sched.first_cycle_multipass_issue
27158 = core2i7_first_cycle_multipass_issue;
27159 targetm.sched.first_cycle_multipass_backtrack
27160 = core2i7_first_cycle_multipass_backtrack;
27161 targetm.sched.first_cycle_multipass_end
27162 = core2i7_first_cycle_multipass_end;
27163 targetm.sched.first_cycle_multipass_fini
27164 = core2i7_first_cycle_multipass_fini;
27166 /* Set decoder parameters. */
27167 core2i7_secondary_decoder_max_insn_size = 8;
27168 core2i7_ifetch_block_size = 16;
27169 core2i7_ifetch_block_max_insns = 6;
27172 /* ... Fall through ... */
27174 targetm.sched.dfa_post_advance_cycle = NULL;
27175 targetm.sched.first_cycle_multipass_init = NULL;
27176 targetm.sched.first_cycle_multipass_begin = NULL;
27177 targetm.sched.first_cycle_multipass_issue = NULL;
27178 targetm.sched.first_cycle_multipass_backtrack = NULL;
27179 targetm.sched.first_cycle_multipass_end = NULL;
27180 targetm.sched.first_cycle_multipass_fini = NULL;
27186 /* Compute the alignment given to a constant that is being placed in memory.
27187 EXP is the constant and ALIGN is the alignment that the object would
27189 The value of this function is used instead of that alignment to align
27193 ix86_constant_alignment (tree exp, int align)
27195 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27196 || TREE_CODE (exp) == INTEGER_CST)
27198 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27200 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27203 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27204 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27205 return BITS_PER_WORD;
27210 /* Compute the alignment for a static variable.
27211 TYPE is the data type, and ALIGN is the alignment that
27212 the object would ordinarily have. The value of this function is used
27213 instead of that alignment to align the object. */
27216 ix86_data_alignment (tree type, int align, bool opt)
27218 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27219 for symbols from other compilation units or symbols that don't need
27220 to bind locally. In order to preserve some ABI compatibility with
27221 those compilers, ensure we don't decrease alignment from what we
27224 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
27226 /* A data structure, equal or greater than the size of a cache line
27227 (64 bytes in the Pentium 4 and other recent Intel processors, including
27228 processors based on Intel Core microarchitecture) should be aligned
27229 so that its base address is a multiple of a cache line size. */
27232 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27234 if (max_align < BITS_PER_WORD)
27235 max_align = BITS_PER_WORD;
27237 switch (ix86_align_data_type)
27239 case ix86_align_data_type_abi: opt = false; break;
27240 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
27241 case ix86_align_data_type_cacheline: break;
27245 && AGGREGATE_TYPE_P (type)
27246 && TYPE_SIZE (type)
27247 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27249 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27250 && align < max_align_compat)
27251 align = max_align_compat;
27252 if (wi::geu_p (TYPE_SIZE (type), max_align)
27253 && align < max_align)
27257 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27258 to 16byte boundary. */
27261 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27262 && TYPE_SIZE (type)
27263 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27264 && wi::geu_p (TYPE_SIZE (type), 128)
27272 if (TREE_CODE (type) == ARRAY_TYPE)
27274 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27276 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27279 else if (TREE_CODE (type) == COMPLEX_TYPE)
27282 if (TYPE_MODE (type) == DCmode && align < 64)
27284 if ((TYPE_MODE (type) == XCmode
27285 || TYPE_MODE (type) == TCmode) && align < 128)
27288 else if ((TREE_CODE (type) == RECORD_TYPE
27289 || TREE_CODE (type) == UNION_TYPE
27290 || TREE_CODE (type) == QUAL_UNION_TYPE)
27291 && TYPE_FIELDS (type))
27293 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27295 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27298 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27299 || TREE_CODE (type) == INTEGER_TYPE)
27301 if (TYPE_MODE (type) == DFmode && align < 64)
27303 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27310 /* Compute the alignment for a local variable or a stack slot. EXP is
27311 the data type or decl itself, MODE is the widest mode available and
27312 ALIGN is the alignment that the object would ordinarily have. The
27313 value of this macro is used instead of that alignment to align the
27317 ix86_local_alignment (tree exp, machine_mode mode,
27318 unsigned int align)
27322 if (exp && DECL_P (exp))
27324 type = TREE_TYPE (exp);
27333 /* Don't do dynamic stack realignment for long long objects with
27334 -mpreferred-stack-boundary=2. */
27337 && ix86_preferred_stack_boundary < 64
27338 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27339 && (!type || !TYPE_USER_ALIGN (type))
27340 && (!decl || !DECL_USER_ALIGN (decl)))
27343 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27344 register in MODE. We will return the largest alignment of XF
27348 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27349 align = GET_MODE_ALIGNMENT (DFmode);
27353 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27354 to 16byte boundary. Exact wording is:
27356 An array uses the same alignment as its elements, except that a local or
27357 global array variable of length at least 16 bytes or
27358 a C99 variable-length array variable always has alignment of at least 16 bytes.
27360 This was added to allow use of aligned SSE instructions at arrays. This
27361 rule is meant for static storage (where compiler can not do the analysis
27362 by itself). We follow it for automatic variables only when convenient.
27363 We fully control everything in the function compiled and functions from
27364 other unit can not rely on the alignment.
27366 Exclude va_list type. It is the common case of local array where
27367 we can not benefit from the alignment.
27369 TODO: Probably one should optimize for size only when var is not escaping. */
27370 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27373 if (AGGREGATE_TYPE_P (type)
27374 && (va_list_type_node == NULL_TREE
27375 || (TYPE_MAIN_VARIANT (type)
27376 != TYPE_MAIN_VARIANT (va_list_type_node)))
27377 && TYPE_SIZE (type)
27378 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27379 && wi::geu_p (TYPE_SIZE (type), 16)
27383 if (TREE_CODE (type) == ARRAY_TYPE)
27385 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27387 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27390 else if (TREE_CODE (type) == COMPLEX_TYPE)
27392 if (TYPE_MODE (type) == DCmode && align < 64)
27394 if ((TYPE_MODE (type) == XCmode
27395 || TYPE_MODE (type) == TCmode) && align < 128)
27398 else if ((TREE_CODE (type) == RECORD_TYPE
27399 || TREE_CODE (type) == UNION_TYPE
27400 || TREE_CODE (type) == QUAL_UNION_TYPE)
27401 && TYPE_FIELDS (type))
27403 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27405 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27408 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27409 || TREE_CODE (type) == INTEGER_TYPE)
27412 if (TYPE_MODE (type) == DFmode && align < 64)
27414 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27420 /* Compute the minimum required alignment for dynamic stack realignment
27421 purposes for a local variable, parameter or a stack slot. EXP is
27422 the data type or decl itself, MODE is its mode and ALIGN is the
27423 alignment that the object would ordinarily have. */
27426 ix86_minimum_alignment (tree exp, machine_mode mode,
27427 unsigned int align)
27431 if (exp && DECL_P (exp))
27433 type = TREE_TYPE (exp);
27442 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27445 /* Don't do dynamic stack realignment for long long objects with
27446 -mpreferred-stack-boundary=2. */
27447 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27448 && (!type || !TYPE_USER_ALIGN (type))
27449 && (!decl || !DECL_USER_ALIGN (decl)))
27455 /* Find a location for the static chain incoming to a nested function.
27456 This is a register, unless all free registers are used by arguments. */
27459 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27463 /* While this function won't be called by the middle-end when a static
27464 chain isn't needed, it's also used throughout the backend so it's
27465 easiest to keep this check centralized. */
27466 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27471 /* We always use R10 in 64-bit mode. */
27476 const_tree fntype, fndecl;
27479 /* By default in 32-bit mode we use ECX to pass the static chain. */
27482 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27484 fntype = TREE_TYPE (fndecl_or_type);
27485 fndecl = fndecl_or_type;
27489 fntype = fndecl_or_type;
27493 ccvt = ix86_get_callcvt (fntype);
27494 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27496 /* Fastcall functions use ecx/edx for arguments, which leaves
27497 us with EAX for the static chain.
27498 Thiscall functions use ecx for arguments, which also
27499 leaves us with EAX for the static chain. */
27502 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27504 /* Thiscall functions use ecx for arguments, which leaves
27505 us with EAX and EDX for the static chain.
27506 We are using for abi-compatibility EAX. */
27509 else if (ix86_function_regparm (fntype, fndecl) == 3)
27511 /* For regparm 3, we have no free call-clobbered registers in
27512 which to store the static chain. In order to implement this,
27513 we have the trampoline push the static chain to the stack.
27514 However, we can't push a value below the return address when
27515 we call the nested function directly, so we have to use an
27516 alternate entry point. For this we use ESI, and have the
27517 alternate entry point push ESI, so that things appear the
27518 same once we're executing the nested function. */
27521 if (fndecl == current_function_decl)
27522 ix86_static_chain_on_stack = true;
27523 return gen_frame_mem (SImode,
27524 plus_constant (Pmode,
27525 arg_pointer_rtx, -8));
27531 return gen_rtx_REG (Pmode, regno);
27534 /* Emit RTL insns to initialize the variable parts of a trampoline.
27535 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27536 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27537 to be passed to the target function. */
27540 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27546 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27552 /* Load the function address to r11. Try to load address using
27553 the shorter movl instead of movabs. We may want to support
27554 movq for kernel mode, but kernel does not use trampolines at
27555 the moment. FNADDR is a 32bit address and may not be in
27556 DImode when ptr_mode == SImode. Always use movl in this
27558 if (ptr_mode == SImode
27559 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27561 fnaddr = copy_addr_to_reg (fnaddr);
27563 mem = adjust_address (m_tramp, HImode, offset);
27564 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27566 mem = adjust_address (m_tramp, SImode, offset + 2);
27567 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27572 mem = adjust_address (m_tramp, HImode, offset);
27573 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27575 mem = adjust_address (m_tramp, DImode, offset + 2);
27576 emit_move_insn (mem, fnaddr);
27580 /* Load static chain using movabs to r10. Use the shorter movl
27581 instead of movabs when ptr_mode == SImode. */
27582 if (ptr_mode == SImode)
27593 mem = adjust_address (m_tramp, HImode, offset);
27594 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27596 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27597 emit_move_insn (mem, chain_value);
27600 /* Jump to r11; the last (unused) byte is a nop, only there to
27601 pad the write out to a single 32-bit store. */
27602 mem = adjust_address (m_tramp, SImode, offset);
27603 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27610 /* Depending on the static chain location, either load a register
27611 with a constant, or push the constant to the stack. All of the
27612 instructions are the same size. */
27613 chain = ix86_static_chain (fndecl, true);
27616 switch (REGNO (chain))
27619 opcode = 0xb8; break;
27621 opcode = 0xb9; break;
27623 gcc_unreachable ();
27629 mem = adjust_address (m_tramp, QImode, offset);
27630 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27632 mem = adjust_address (m_tramp, SImode, offset + 1);
27633 emit_move_insn (mem, chain_value);
27636 mem = adjust_address (m_tramp, QImode, offset);
27637 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27639 mem = adjust_address (m_tramp, SImode, offset + 1);
27641 /* Compute offset from the end of the jmp to the target function.
27642 In the case in which the trampoline stores the static chain on
27643 the stack, we need to skip the first insn which pushes the
27644 (call-saved) register static chain; this push is 1 byte. */
27646 disp = expand_binop (SImode, sub_optab, fnaddr,
27647 plus_constant (Pmode, XEXP (m_tramp, 0),
27648 offset - (MEM_P (chain) ? 1 : 0)),
27649 NULL_RTX, 1, OPTAB_DIRECT);
27650 emit_move_insn (mem, disp);
27653 gcc_assert (offset <= TRAMPOLINE_SIZE);
27655 #ifdef HAVE_ENABLE_EXECUTE_STACK
27656 #ifdef CHECK_EXECUTE_STACK_ENABLED
27657 if (CHECK_EXECUTE_STACK_ENABLED)
27659 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27660 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27664 /* The following file contains several enumerations and data structures
27665 built from the definitions in i386-builtin-types.def. */
27667 #include "i386-builtin-types.inc"
27669 /* Table for the ix86 builtin non-function types. */
27670 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27672 /* Retrieve an element from the above table, building some of
27673 the types lazily. */
27676 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27678 unsigned int index;
27681 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27683 type = ix86_builtin_type_tab[(int) tcode];
27687 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27688 if (tcode <= IX86_BT_LAST_VECT)
27692 index = tcode - IX86_BT_LAST_PRIM - 1;
27693 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27694 mode = ix86_builtin_type_vect_mode[index];
27696 type = build_vector_type_for_mode (itype, mode);
27702 index = tcode - IX86_BT_LAST_VECT - 1;
27703 if (tcode <= IX86_BT_LAST_PTR)
27704 quals = TYPE_UNQUALIFIED;
27706 quals = TYPE_QUAL_CONST;
27708 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27709 if (quals != TYPE_UNQUALIFIED)
27710 itype = build_qualified_type (itype, quals);
27712 type = build_pointer_type (itype);
27715 ix86_builtin_type_tab[(int) tcode] = type;
27719 /* Table for the ix86 builtin function types. */
27720 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27722 /* Retrieve an element from the above table, building some of
27723 the types lazily. */
27726 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27730 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27732 type = ix86_builtin_func_type_tab[(int) tcode];
27736 if (tcode <= IX86_BT_LAST_FUNC)
27738 unsigned start = ix86_builtin_func_start[(int) tcode];
27739 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27740 tree rtype, atype, args = void_list_node;
27743 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27744 for (i = after - 1; i > start; --i)
27746 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27747 args = tree_cons (NULL, atype, args);
27750 type = build_function_type (rtype, args);
27754 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27755 enum ix86_builtin_func_type icode;
27757 icode = ix86_builtin_func_alias_base[index];
27758 type = ix86_get_builtin_func_type (icode);
27761 ix86_builtin_func_type_tab[(int) tcode] = type;
27766 /* Codes for all the SSE/MMX builtins. */
27769 IX86_BUILTIN_ADDPS,
27770 IX86_BUILTIN_ADDSS,
27771 IX86_BUILTIN_DIVPS,
27772 IX86_BUILTIN_DIVSS,
27773 IX86_BUILTIN_MULPS,
27774 IX86_BUILTIN_MULSS,
27775 IX86_BUILTIN_SUBPS,
27776 IX86_BUILTIN_SUBSS,
27778 IX86_BUILTIN_CMPEQPS,
27779 IX86_BUILTIN_CMPLTPS,
27780 IX86_BUILTIN_CMPLEPS,
27781 IX86_BUILTIN_CMPGTPS,
27782 IX86_BUILTIN_CMPGEPS,
27783 IX86_BUILTIN_CMPNEQPS,
27784 IX86_BUILTIN_CMPNLTPS,
27785 IX86_BUILTIN_CMPNLEPS,
27786 IX86_BUILTIN_CMPNGTPS,
27787 IX86_BUILTIN_CMPNGEPS,
27788 IX86_BUILTIN_CMPORDPS,
27789 IX86_BUILTIN_CMPUNORDPS,
27790 IX86_BUILTIN_CMPEQSS,
27791 IX86_BUILTIN_CMPLTSS,
27792 IX86_BUILTIN_CMPLESS,
27793 IX86_BUILTIN_CMPNEQSS,
27794 IX86_BUILTIN_CMPNLTSS,
27795 IX86_BUILTIN_CMPNLESS,
27796 IX86_BUILTIN_CMPORDSS,
27797 IX86_BUILTIN_CMPUNORDSS,
27799 IX86_BUILTIN_COMIEQSS,
27800 IX86_BUILTIN_COMILTSS,
27801 IX86_BUILTIN_COMILESS,
27802 IX86_BUILTIN_COMIGTSS,
27803 IX86_BUILTIN_COMIGESS,
27804 IX86_BUILTIN_COMINEQSS,
27805 IX86_BUILTIN_UCOMIEQSS,
27806 IX86_BUILTIN_UCOMILTSS,
27807 IX86_BUILTIN_UCOMILESS,
27808 IX86_BUILTIN_UCOMIGTSS,
27809 IX86_BUILTIN_UCOMIGESS,
27810 IX86_BUILTIN_UCOMINEQSS,
27812 IX86_BUILTIN_CVTPI2PS,
27813 IX86_BUILTIN_CVTPS2PI,
27814 IX86_BUILTIN_CVTSI2SS,
27815 IX86_BUILTIN_CVTSI642SS,
27816 IX86_BUILTIN_CVTSS2SI,
27817 IX86_BUILTIN_CVTSS2SI64,
27818 IX86_BUILTIN_CVTTPS2PI,
27819 IX86_BUILTIN_CVTTSS2SI,
27820 IX86_BUILTIN_CVTTSS2SI64,
27822 IX86_BUILTIN_MAXPS,
27823 IX86_BUILTIN_MAXSS,
27824 IX86_BUILTIN_MINPS,
27825 IX86_BUILTIN_MINSS,
27827 IX86_BUILTIN_LOADUPS,
27828 IX86_BUILTIN_STOREUPS,
27829 IX86_BUILTIN_MOVSS,
27831 IX86_BUILTIN_MOVHLPS,
27832 IX86_BUILTIN_MOVLHPS,
27833 IX86_BUILTIN_LOADHPS,
27834 IX86_BUILTIN_LOADLPS,
27835 IX86_BUILTIN_STOREHPS,
27836 IX86_BUILTIN_STORELPS,
27838 IX86_BUILTIN_MASKMOVQ,
27839 IX86_BUILTIN_MOVMSKPS,
27840 IX86_BUILTIN_PMOVMSKB,
27842 IX86_BUILTIN_MOVNTPS,
27843 IX86_BUILTIN_MOVNTQ,
27845 IX86_BUILTIN_LOADDQU,
27846 IX86_BUILTIN_STOREDQU,
27848 IX86_BUILTIN_PACKSSWB,
27849 IX86_BUILTIN_PACKSSDW,
27850 IX86_BUILTIN_PACKUSWB,
27852 IX86_BUILTIN_PADDB,
27853 IX86_BUILTIN_PADDW,
27854 IX86_BUILTIN_PADDD,
27855 IX86_BUILTIN_PADDQ,
27856 IX86_BUILTIN_PADDSB,
27857 IX86_BUILTIN_PADDSW,
27858 IX86_BUILTIN_PADDUSB,
27859 IX86_BUILTIN_PADDUSW,
27860 IX86_BUILTIN_PSUBB,
27861 IX86_BUILTIN_PSUBW,
27862 IX86_BUILTIN_PSUBD,
27863 IX86_BUILTIN_PSUBQ,
27864 IX86_BUILTIN_PSUBSB,
27865 IX86_BUILTIN_PSUBSW,
27866 IX86_BUILTIN_PSUBUSB,
27867 IX86_BUILTIN_PSUBUSW,
27870 IX86_BUILTIN_PANDN,
27874 IX86_BUILTIN_PAVGB,
27875 IX86_BUILTIN_PAVGW,
27877 IX86_BUILTIN_PCMPEQB,
27878 IX86_BUILTIN_PCMPEQW,
27879 IX86_BUILTIN_PCMPEQD,
27880 IX86_BUILTIN_PCMPGTB,
27881 IX86_BUILTIN_PCMPGTW,
27882 IX86_BUILTIN_PCMPGTD,
27884 IX86_BUILTIN_PMADDWD,
27886 IX86_BUILTIN_PMAXSW,
27887 IX86_BUILTIN_PMAXUB,
27888 IX86_BUILTIN_PMINSW,
27889 IX86_BUILTIN_PMINUB,
27891 IX86_BUILTIN_PMULHUW,
27892 IX86_BUILTIN_PMULHW,
27893 IX86_BUILTIN_PMULLW,
27895 IX86_BUILTIN_PSADBW,
27896 IX86_BUILTIN_PSHUFW,
27898 IX86_BUILTIN_PSLLW,
27899 IX86_BUILTIN_PSLLD,
27900 IX86_BUILTIN_PSLLQ,
27901 IX86_BUILTIN_PSRAW,
27902 IX86_BUILTIN_PSRAD,
27903 IX86_BUILTIN_PSRLW,
27904 IX86_BUILTIN_PSRLD,
27905 IX86_BUILTIN_PSRLQ,
27906 IX86_BUILTIN_PSLLWI,
27907 IX86_BUILTIN_PSLLDI,
27908 IX86_BUILTIN_PSLLQI,
27909 IX86_BUILTIN_PSRAWI,
27910 IX86_BUILTIN_PSRADI,
27911 IX86_BUILTIN_PSRLWI,
27912 IX86_BUILTIN_PSRLDI,
27913 IX86_BUILTIN_PSRLQI,
27915 IX86_BUILTIN_PUNPCKHBW,
27916 IX86_BUILTIN_PUNPCKHWD,
27917 IX86_BUILTIN_PUNPCKHDQ,
27918 IX86_BUILTIN_PUNPCKLBW,
27919 IX86_BUILTIN_PUNPCKLWD,
27920 IX86_BUILTIN_PUNPCKLDQ,
27922 IX86_BUILTIN_SHUFPS,
27924 IX86_BUILTIN_RCPPS,
27925 IX86_BUILTIN_RCPSS,
27926 IX86_BUILTIN_RSQRTPS,
27927 IX86_BUILTIN_RSQRTPS_NR,
27928 IX86_BUILTIN_RSQRTSS,
27929 IX86_BUILTIN_RSQRTF,
27930 IX86_BUILTIN_SQRTPS,
27931 IX86_BUILTIN_SQRTPS_NR,
27932 IX86_BUILTIN_SQRTSS,
27934 IX86_BUILTIN_UNPCKHPS,
27935 IX86_BUILTIN_UNPCKLPS,
27937 IX86_BUILTIN_ANDPS,
27938 IX86_BUILTIN_ANDNPS,
27940 IX86_BUILTIN_XORPS,
27943 IX86_BUILTIN_LDMXCSR,
27944 IX86_BUILTIN_STMXCSR,
27945 IX86_BUILTIN_SFENCE,
27947 IX86_BUILTIN_FXSAVE,
27948 IX86_BUILTIN_FXRSTOR,
27949 IX86_BUILTIN_FXSAVE64,
27950 IX86_BUILTIN_FXRSTOR64,
27952 IX86_BUILTIN_XSAVE,
27953 IX86_BUILTIN_XRSTOR,
27954 IX86_BUILTIN_XSAVE64,
27955 IX86_BUILTIN_XRSTOR64,
27957 IX86_BUILTIN_XSAVEOPT,
27958 IX86_BUILTIN_XSAVEOPT64,
27960 IX86_BUILTIN_XSAVEC,
27961 IX86_BUILTIN_XSAVEC64,
27963 IX86_BUILTIN_XSAVES,
27964 IX86_BUILTIN_XRSTORS,
27965 IX86_BUILTIN_XSAVES64,
27966 IX86_BUILTIN_XRSTORS64,
27968 /* 3DNow! Original */
27969 IX86_BUILTIN_FEMMS,
27970 IX86_BUILTIN_PAVGUSB,
27971 IX86_BUILTIN_PF2ID,
27972 IX86_BUILTIN_PFACC,
27973 IX86_BUILTIN_PFADD,
27974 IX86_BUILTIN_PFCMPEQ,
27975 IX86_BUILTIN_PFCMPGE,
27976 IX86_BUILTIN_PFCMPGT,
27977 IX86_BUILTIN_PFMAX,
27978 IX86_BUILTIN_PFMIN,
27979 IX86_BUILTIN_PFMUL,
27980 IX86_BUILTIN_PFRCP,
27981 IX86_BUILTIN_PFRCPIT1,
27982 IX86_BUILTIN_PFRCPIT2,
27983 IX86_BUILTIN_PFRSQIT1,
27984 IX86_BUILTIN_PFRSQRT,
27985 IX86_BUILTIN_PFSUB,
27986 IX86_BUILTIN_PFSUBR,
27987 IX86_BUILTIN_PI2FD,
27988 IX86_BUILTIN_PMULHRW,
27990 /* 3DNow! Athlon Extensions */
27991 IX86_BUILTIN_PF2IW,
27992 IX86_BUILTIN_PFNACC,
27993 IX86_BUILTIN_PFPNACC,
27994 IX86_BUILTIN_PI2FW,
27995 IX86_BUILTIN_PSWAPDSI,
27996 IX86_BUILTIN_PSWAPDSF,
27999 IX86_BUILTIN_ADDPD,
28000 IX86_BUILTIN_ADDSD,
28001 IX86_BUILTIN_DIVPD,
28002 IX86_BUILTIN_DIVSD,
28003 IX86_BUILTIN_MULPD,
28004 IX86_BUILTIN_MULSD,
28005 IX86_BUILTIN_SUBPD,
28006 IX86_BUILTIN_SUBSD,
28008 IX86_BUILTIN_CMPEQPD,
28009 IX86_BUILTIN_CMPLTPD,
28010 IX86_BUILTIN_CMPLEPD,
28011 IX86_BUILTIN_CMPGTPD,
28012 IX86_BUILTIN_CMPGEPD,
28013 IX86_BUILTIN_CMPNEQPD,
28014 IX86_BUILTIN_CMPNLTPD,
28015 IX86_BUILTIN_CMPNLEPD,
28016 IX86_BUILTIN_CMPNGTPD,
28017 IX86_BUILTIN_CMPNGEPD,
28018 IX86_BUILTIN_CMPORDPD,
28019 IX86_BUILTIN_CMPUNORDPD,
28020 IX86_BUILTIN_CMPEQSD,
28021 IX86_BUILTIN_CMPLTSD,
28022 IX86_BUILTIN_CMPLESD,
28023 IX86_BUILTIN_CMPNEQSD,
28024 IX86_BUILTIN_CMPNLTSD,
28025 IX86_BUILTIN_CMPNLESD,
28026 IX86_BUILTIN_CMPORDSD,
28027 IX86_BUILTIN_CMPUNORDSD,
28029 IX86_BUILTIN_COMIEQSD,
28030 IX86_BUILTIN_COMILTSD,
28031 IX86_BUILTIN_COMILESD,
28032 IX86_BUILTIN_COMIGTSD,
28033 IX86_BUILTIN_COMIGESD,
28034 IX86_BUILTIN_COMINEQSD,
28035 IX86_BUILTIN_UCOMIEQSD,
28036 IX86_BUILTIN_UCOMILTSD,
28037 IX86_BUILTIN_UCOMILESD,
28038 IX86_BUILTIN_UCOMIGTSD,
28039 IX86_BUILTIN_UCOMIGESD,
28040 IX86_BUILTIN_UCOMINEQSD,
28042 IX86_BUILTIN_MAXPD,
28043 IX86_BUILTIN_MAXSD,
28044 IX86_BUILTIN_MINPD,
28045 IX86_BUILTIN_MINSD,
28047 IX86_BUILTIN_ANDPD,
28048 IX86_BUILTIN_ANDNPD,
28050 IX86_BUILTIN_XORPD,
28052 IX86_BUILTIN_SQRTPD,
28053 IX86_BUILTIN_SQRTSD,
28055 IX86_BUILTIN_UNPCKHPD,
28056 IX86_BUILTIN_UNPCKLPD,
28058 IX86_BUILTIN_SHUFPD,
28060 IX86_BUILTIN_LOADUPD,
28061 IX86_BUILTIN_STOREUPD,
28062 IX86_BUILTIN_MOVSD,
28064 IX86_BUILTIN_LOADHPD,
28065 IX86_BUILTIN_LOADLPD,
28067 IX86_BUILTIN_CVTDQ2PD,
28068 IX86_BUILTIN_CVTDQ2PS,
28070 IX86_BUILTIN_CVTPD2DQ,
28071 IX86_BUILTIN_CVTPD2PI,
28072 IX86_BUILTIN_CVTPD2PS,
28073 IX86_BUILTIN_CVTTPD2DQ,
28074 IX86_BUILTIN_CVTTPD2PI,
28076 IX86_BUILTIN_CVTPI2PD,
28077 IX86_BUILTIN_CVTSI2SD,
28078 IX86_BUILTIN_CVTSI642SD,
28080 IX86_BUILTIN_CVTSD2SI,
28081 IX86_BUILTIN_CVTSD2SI64,
28082 IX86_BUILTIN_CVTSD2SS,
28083 IX86_BUILTIN_CVTSS2SD,
28084 IX86_BUILTIN_CVTTSD2SI,
28085 IX86_BUILTIN_CVTTSD2SI64,
28087 IX86_BUILTIN_CVTPS2DQ,
28088 IX86_BUILTIN_CVTPS2PD,
28089 IX86_BUILTIN_CVTTPS2DQ,
28091 IX86_BUILTIN_MOVNTI,
28092 IX86_BUILTIN_MOVNTI64,
28093 IX86_BUILTIN_MOVNTPD,
28094 IX86_BUILTIN_MOVNTDQ,
28096 IX86_BUILTIN_MOVQ128,
28099 IX86_BUILTIN_MASKMOVDQU,
28100 IX86_BUILTIN_MOVMSKPD,
28101 IX86_BUILTIN_PMOVMSKB128,
28103 IX86_BUILTIN_PACKSSWB128,
28104 IX86_BUILTIN_PACKSSDW128,
28105 IX86_BUILTIN_PACKUSWB128,
28107 IX86_BUILTIN_PADDB128,
28108 IX86_BUILTIN_PADDW128,
28109 IX86_BUILTIN_PADDD128,
28110 IX86_BUILTIN_PADDQ128,
28111 IX86_BUILTIN_PADDSB128,
28112 IX86_BUILTIN_PADDSW128,
28113 IX86_BUILTIN_PADDUSB128,
28114 IX86_BUILTIN_PADDUSW128,
28115 IX86_BUILTIN_PSUBB128,
28116 IX86_BUILTIN_PSUBW128,
28117 IX86_BUILTIN_PSUBD128,
28118 IX86_BUILTIN_PSUBQ128,
28119 IX86_BUILTIN_PSUBSB128,
28120 IX86_BUILTIN_PSUBSW128,
28121 IX86_BUILTIN_PSUBUSB128,
28122 IX86_BUILTIN_PSUBUSW128,
28124 IX86_BUILTIN_PAND128,
28125 IX86_BUILTIN_PANDN128,
28126 IX86_BUILTIN_POR128,
28127 IX86_BUILTIN_PXOR128,
28129 IX86_BUILTIN_PAVGB128,
28130 IX86_BUILTIN_PAVGW128,
28132 IX86_BUILTIN_PCMPEQB128,
28133 IX86_BUILTIN_PCMPEQW128,
28134 IX86_BUILTIN_PCMPEQD128,
28135 IX86_BUILTIN_PCMPGTB128,
28136 IX86_BUILTIN_PCMPGTW128,
28137 IX86_BUILTIN_PCMPGTD128,
28139 IX86_BUILTIN_PMADDWD128,
28141 IX86_BUILTIN_PMAXSW128,
28142 IX86_BUILTIN_PMAXUB128,
28143 IX86_BUILTIN_PMINSW128,
28144 IX86_BUILTIN_PMINUB128,
28146 IX86_BUILTIN_PMULUDQ,
28147 IX86_BUILTIN_PMULUDQ128,
28148 IX86_BUILTIN_PMULHUW128,
28149 IX86_BUILTIN_PMULHW128,
28150 IX86_BUILTIN_PMULLW128,
28152 IX86_BUILTIN_PSADBW128,
28153 IX86_BUILTIN_PSHUFHW,
28154 IX86_BUILTIN_PSHUFLW,
28155 IX86_BUILTIN_PSHUFD,
28157 IX86_BUILTIN_PSLLDQI128,
28158 IX86_BUILTIN_PSLLWI128,
28159 IX86_BUILTIN_PSLLDI128,
28160 IX86_BUILTIN_PSLLQI128,
28161 IX86_BUILTIN_PSRAWI128,
28162 IX86_BUILTIN_PSRADI128,
28163 IX86_BUILTIN_PSRLDQI128,
28164 IX86_BUILTIN_PSRLWI128,
28165 IX86_BUILTIN_PSRLDI128,
28166 IX86_BUILTIN_PSRLQI128,
28168 IX86_BUILTIN_PSLLDQ128,
28169 IX86_BUILTIN_PSLLW128,
28170 IX86_BUILTIN_PSLLD128,
28171 IX86_BUILTIN_PSLLQ128,
28172 IX86_BUILTIN_PSRAW128,
28173 IX86_BUILTIN_PSRAD128,
28174 IX86_BUILTIN_PSRLW128,
28175 IX86_BUILTIN_PSRLD128,
28176 IX86_BUILTIN_PSRLQ128,
28178 IX86_BUILTIN_PUNPCKHBW128,
28179 IX86_BUILTIN_PUNPCKHWD128,
28180 IX86_BUILTIN_PUNPCKHDQ128,
28181 IX86_BUILTIN_PUNPCKHQDQ128,
28182 IX86_BUILTIN_PUNPCKLBW128,
28183 IX86_BUILTIN_PUNPCKLWD128,
28184 IX86_BUILTIN_PUNPCKLDQ128,
28185 IX86_BUILTIN_PUNPCKLQDQ128,
28187 IX86_BUILTIN_CLFLUSH,
28188 IX86_BUILTIN_MFENCE,
28189 IX86_BUILTIN_LFENCE,
28190 IX86_BUILTIN_PAUSE,
28192 IX86_BUILTIN_FNSTENV,
28193 IX86_BUILTIN_FLDENV,
28194 IX86_BUILTIN_FNSTSW,
28195 IX86_BUILTIN_FNCLEX,
28197 IX86_BUILTIN_BSRSI,
28198 IX86_BUILTIN_BSRDI,
28199 IX86_BUILTIN_RDPMC,
28200 IX86_BUILTIN_RDTSC,
28201 IX86_BUILTIN_RDTSCP,
28202 IX86_BUILTIN_ROLQI,
28203 IX86_BUILTIN_ROLHI,
28204 IX86_BUILTIN_RORQI,
28205 IX86_BUILTIN_RORHI,
28208 IX86_BUILTIN_ADDSUBPS,
28209 IX86_BUILTIN_HADDPS,
28210 IX86_BUILTIN_HSUBPS,
28211 IX86_BUILTIN_MOVSHDUP,
28212 IX86_BUILTIN_MOVSLDUP,
28213 IX86_BUILTIN_ADDSUBPD,
28214 IX86_BUILTIN_HADDPD,
28215 IX86_BUILTIN_HSUBPD,
28216 IX86_BUILTIN_LDDQU,
28218 IX86_BUILTIN_MONITOR,
28219 IX86_BUILTIN_MWAIT,
28222 IX86_BUILTIN_PHADDW,
28223 IX86_BUILTIN_PHADDD,
28224 IX86_BUILTIN_PHADDSW,
28225 IX86_BUILTIN_PHSUBW,
28226 IX86_BUILTIN_PHSUBD,
28227 IX86_BUILTIN_PHSUBSW,
28228 IX86_BUILTIN_PMADDUBSW,
28229 IX86_BUILTIN_PMULHRSW,
28230 IX86_BUILTIN_PSHUFB,
28231 IX86_BUILTIN_PSIGNB,
28232 IX86_BUILTIN_PSIGNW,
28233 IX86_BUILTIN_PSIGND,
28234 IX86_BUILTIN_PALIGNR,
28235 IX86_BUILTIN_PABSB,
28236 IX86_BUILTIN_PABSW,
28237 IX86_BUILTIN_PABSD,
28239 IX86_BUILTIN_PHADDW128,
28240 IX86_BUILTIN_PHADDD128,
28241 IX86_BUILTIN_PHADDSW128,
28242 IX86_BUILTIN_PHSUBW128,
28243 IX86_BUILTIN_PHSUBD128,
28244 IX86_BUILTIN_PHSUBSW128,
28245 IX86_BUILTIN_PMADDUBSW128,
28246 IX86_BUILTIN_PMULHRSW128,
28247 IX86_BUILTIN_PSHUFB128,
28248 IX86_BUILTIN_PSIGNB128,
28249 IX86_BUILTIN_PSIGNW128,
28250 IX86_BUILTIN_PSIGND128,
28251 IX86_BUILTIN_PALIGNR128,
28252 IX86_BUILTIN_PABSB128,
28253 IX86_BUILTIN_PABSW128,
28254 IX86_BUILTIN_PABSD128,
28256 /* AMDFAM10 - SSE4A New Instructions. */
28257 IX86_BUILTIN_MOVNTSD,
28258 IX86_BUILTIN_MOVNTSS,
28259 IX86_BUILTIN_EXTRQI,
28260 IX86_BUILTIN_EXTRQ,
28261 IX86_BUILTIN_INSERTQI,
28262 IX86_BUILTIN_INSERTQ,
28265 IX86_BUILTIN_BLENDPD,
28266 IX86_BUILTIN_BLENDPS,
28267 IX86_BUILTIN_BLENDVPD,
28268 IX86_BUILTIN_BLENDVPS,
28269 IX86_BUILTIN_PBLENDVB128,
28270 IX86_BUILTIN_PBLENDW128,
28275 IX86_BUILTIN_INSERTPS128,
28277 IX86_BUILTIN_MOVNTDQA,
28278 IX86_BUILTIN_MPSADBW128,
28279 IX86_BUILTIN_PACKUSDW128,
28280 IX86_BUILTIN_PCMPEQQ,
28281 IX86_BUILTIN_PHMINPOSUW128,
28283 IX86_BUILTIN_PMAXSB128,
28284 IX86_BUILTIN_PMAXSD128,
28285 IX86_BUILTIN_PMAXUD128,
28286 IX86_BUILTIN_PMAXUW128,
28288 IX86_BUILTIN_PMINSB128,
28289 IX86_BUILTIN_PMINSD128,
28290 IX86_BUILTIN_PMINUD128,
28291 IX86_BUILTIN_PMINUW128,
28293 IX86_BUILTIN_PMOVSXBW128,
28294 IX86_BUILTIN_PMOVSXBD128,
28295 IX86_BUILTIN_PMOVSXBQ128,
28296 IX86_BUILTIN_PMOVSXWD128,
28297 IX86_BUILTIN_PMOVSXWQ128,
28298 IX86_BUILTIN_PMOVSXDQ128,
28300 IX86_BUILTIN_PMOVZXBW128,
28301 IX86_BUILTIN_PMOVZXBD128,
28302 IX86_BUILTIN_PMOVZXBQ128,
28303 IX86_BUILTIN_PMOVZXWD128,
28304 IX86_BUILTIN_PMOVZXWQ128,
28305 IX86_BUILTIN_PMOVZXDQ128,
28307 IX86_BUILTIN_PMULDQ128,
28308 IX86_BUILTIN_PMULLD128,
28310 IX86_BUILTIN_ROUNDSD,
28311 IX86_BUILTIN_ROUNDSS,
28313 IX86_BUILTIN_ROUNDPD,
28314 IX86_BUILTIN_ROUNDPS,
28316 IX86_BUILTIN_FLOORPD,
28317 IX86_BUILTIN_CEILPD,
28318 IX86_BUILTIN_TRUNCPD,
28319 IX86_BUILTIN_RINTPD,
28320 IX86_BUILTIN_ROUNDPD_AZ,
28322 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28323 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28324 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28326 IX86_BUILTIN_FLOORPS,
28327 IX86_BUILTIN_CEILPS,
28328 IX86_BUILTIN_TRUNCPS,
28329 IX86_BUILTIN_RINTPS,
28330 IX86_BUILTIN_ROUNDPS_AZ,
28332 IX86_BUILTIN_FLOORPS_SFIX,
28333 IX86_BUILTIN_CEILPS_SFIX,
28334 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28336 IX86_BUILTIN_PTESTZ,
28337 IX86_BUILTIN_PTESTC,
28338 IX86_BUILTIN_PTESTNZC,
28340 IX86_BUILTIN_VEC_INIT_V2SI,
28341 IX86_BUILTIN_VEC_INIT_V4HI,
28342 IX86_BUILTIN_VEC_INIT_V8QI,
28343 IX86_BUILTIN_VEC_EXT_V2DF,
28344 IX86_BUILTIN_VEC_EXT_V2DI,
28345 IX86_BUILTIN_VEC_EXT_V4SF,
28346 IX86_BUILTIN_VEC_EXT_V4SI,
28347 IX86_BUILTIN_VEC_EXT_V8HI,
28348 IX86_BUILTIN_VEC_EXT_V2SI,
28349 IX86_BUILTIN_VEC_EXT_V4HI,
28350 IX86_BUILTIN_VEC_EXT_V16QI,
28351 IX86_BUILTIN_VEC_SET_V2DI,
28352 IX86_BUILTIN_VEC_SET_V4SF,
28353 IX86_BUILTIN_VEC_SET_V4SI,
28354 IX86_BUILTIN_VEC_SET_V8HI,
28355 IX86_BUILTIN_VEC_SET_V4HI,
28356 IX86_BUILTIN_VEC_SET_V16QI,
28358 IX86_BUILTIN_VEC_PACK_SFIX,
28359 IX86_BUILTIN_VEC_PACK_SFIX256,
28362 IX86_BUILTIN_CRC32QI,
28363 IX86_BUILTIN_CRC32HI,
28364 IX86_BUILTIN_CRC32SI,
28365 IX86_BUILTIN_CRC32DI,
28367 IX86_BUILTIN_PCMPESTRI128,
28368 IX86_BUILTIN_PCMPESTRM128,
28369 IX86_BUILTIN_PCMPESTRA128,
28370 IX86_BUILTIN_PCMPESTRC128,
28371 IX86_BUILTIN_PCMPESTRO128,
28372 IX86_BUILTIN_PCMPESTRS128,
28373 IX86_BUILTIN_PCMPESTRZ128,
28374 IX86_BUILTIN_PCMPISTRI128,
28375 IX86_BUILTIN_PCMPISTRM128,
28376 IX86_BUILTIN_PCMPISTRA128,
28377 IX86_BUILTIN_PCMPISTRC128,
28378 IX86_BUILTIN_PCMPISTRO128,
28379 IX86_BUILTIN_PCMPISTRS128,
28380 IX86_BUILTIN_PCMPISTRZ128,
28382 IX86_BUILTIN_PCMPGTQ,
28384 /* AES instructions */
28385 IX86_BUILTIN_AESENC128,
28386 IX86_BUILTIN_AESENCLAST128,
28387 IX86_BUILTIN_AESDEC128,
28388 IX86_BUILTIN_AESDECLAST128,
28389 IX86_BUILTIN_AESIMC128,
28390 IX86_BUILTIN_AESKEYGENASSIST128,
28392 /* PCLMUL instruction */
28393 IX86_BUILTIN_PCLMULQDQ128,
28396 IX86_BUILTIN_ADDPD256,
28397 IX86_BUILTIN_ADDPS256,
28398 IX86_BUILTIN_ADDSUBPD256,
28399 IX86_BUILTIN_ADDSUBPS256,
28400 IX86_BUILTIN_ANDPD256,
28401 IX86_BUILTIN_ANDPS256,
28402 IX86_BUILTIN_ANDNPD256,
28403 IX86_BUILTIN_ANDNPS256,
28404 IX86_BUILTIN_BLENDPD256,
28405 IX86_BUILTIN_BLENDPS256,
28406 IX86_BUILTIN_BLENDVPD256,
28407 IX86_BUILTIN_BLENDVPS256,
28408 IX86_BUILTIN_DIVPD256,
28409 IX86_BUILTIN_DIVPS256,
28410 IX86_BUILTIN_DPPS256,
28411 IX86_BUILTIN_HADDPD256,
28412 IX86_BUILTIN_HADDPS256,
28413 IX86_BUILTIN_HSUBPD256,
28414 IX86_BUILTIN_HSUBPS256,
28415 IX86_BUILTIN_MAXPD256,
28416 IX86_BUILTIN_MAXPS256,
28417 IX86_BUILTIN_MINPD256,
28418 IX86_BUILTIN_MINPS256,
28419 IX86_BUILTIN_MULPD256,
28420 IX86_BUILTIN_MULPS256,
28421 IX86_BUILTIN_ORPD256,
28422 IX86_BUILTIN_ORPS256,
28423 IX86_BUILTIN_SHUFPD256,
28424 IX86_BUILTIN_SHUFPS256,
28425 IX86_BUILTIN_SUBPD256,
28426 IX86_BUILTIN_SUBPS256,
28427 IX86_BUILTIN_XORPD256,
28428 IX86_BUILTIN_XORPS256,
28429 IX86_BUILTIN_CMPSD,
28430 IX86_BUILTIN_CMPSS,
28431 IX86_BUILTIN_CMPPD,
28432 IX86_BUILTIN_CMPPS,
28433 IX86_BUILTIN_CMPPD256,
28434 IX86_BUILTIN_CMPPS256,
28435 IX86_BUILTIN_CVTDQ2PD256,
28436 IX86_BUILTIN_CVTDQ2PS256,
28437 IX86_BUILTIN_CVTPD2PS256,
28438 IX86_BUILTIN_CVTPS2DQ256,
28439 IX86_BUILTIN_CVTPS2PD256,
28440 IX86_BUILTIN_CVTTPD2DQ256,
28441 IX86_BUILTIN_CVTPD2DQ256,
28442 IX86_BUILTIN_CVTTPS2DQ256,
28443 IX86_BUILTIN_EXTRACTF128PD256,
28444 IX86_BUILTIN_EXTRACTF128PS256,
28445 IX86_BUILTIN_EXTRACTF128SI256,
28446 IX86_BUILTIN_VZEROALL,
28447 IX86_BUILTIN_VZEROUPPER,
28448 IX86_BUILTIN_VPERMILVARPD,
28449 IX86_BUILTIN_VPERMILVARPS,
28450 IX86_BUILTIN_VPERMILVARPD256,
28451 IX86_BUILTIN_VPERMILVARPS256,
28452 IX86_BUILTIN_VPERMILPD,
28453 IX86_BUILTIN_VPERMILPS,
28454 IX86_BUILTIN_VPERMILPD256,
28455 IX86_BUILTIN_VPERMILPS256,
28456 IX86_BUILTIN_VPERMIL2PD,
28457 IX86_BUILTIN_VPERMIL2PS,
28458 IX86_BUILTIN_VPERMIL2PD256,
28459 IX86_BUILTIN_VPERMIL2PS256,
28460 IX86_BUILTIN_VPERM2F128PD256,
28461 IX86_BUILTIN_VPERM2F128PS256,
28462 IX86_BUILTIN_VPERM2F128SI256,
28463 IX86_BUILTIN_VBROADCASTSS,
28464 IX86_BUILTIN_VBROADCASTSD256,
28465 IX86_BUILTIN_VBROADCASTSS256,
28466 IX86_BUILTIN_VBROADCASTPD256,
28467 IX86_BUILTIN_VBROADCASTPS256,
28468 IX86_BUILTIN_VINSERTF128PD256,
28469 IX86_BUILTIN_VINSERTF128PS256,
28470 IX86_BUILTIN_VINSERTF128SI256,
28471 IX86_BUILTIN_LOADUPD256,
28472 IX86_BUILTIN_LOADUPS256,
28473 IX86_BUILTIN_STOREUPD256,
28474 IX86_BUILTIN_STOREUPS256,
28475 IX86_BUILTIN_LDDQU256,
28476 IX86_BUILTIN_MOVNTDQ256,
28477 IX86_BUILTIN_MOVNTPD256,
28478 IX86_BUILTIN_MOVNTPS256,
28479 IX86_BUILTIN_LOADDQU256,
28480 IX86_BUILTIN_STOREDQU256,
28481 IX86_BUILTIN_MASKLOADPD,
28482 IX86_BUILTIN_MASKLOADPS,
28483 IX86_BUILTIN_MASKSTOREPD,
28484 IX86_BUILTIN_MASKSTOREPS,
28485 IX86_BUILTIN_MASKLOADPD256,
28486 IX86_BUILTIN_MASKLOADPS256,
28487 IX86_BUILTIN_MASKSTOREPD256,
28488 IX86_BUILTIN_MASKSTOREPS256,
28489 IX86_BUILTIN_MOVSHDUP256,
28490 IX86_BUILTIN_MOVSLDUP256,
28491 IX86_BUILTIN_MOVDDUP256,
28493 IX86_BUILTIN_SQRTPD256,
28494 IX86_BUILTIN_SQRTPS256,
28495 IX86_BUILTIN_SQRTPS_NR256,
28496 IX86_BUILTIN_RSQRTPS256,
28497 IX86_BUILTIN_RSQRTPS_NR256,
28499 IX86_BUILTIN_RCPPS256,
28501 IX86_BUILTIN_ROUNDPD256,
28502 IX86_BUILTIN_ROUNDPS256,
28504 IX86_BUILTIN_FLOORPD256,
28505 IX86_BUILTIN_CEILPD256,
28506 IX86_BUILTIN_TRUNCPD256,
28507 IX86_BUILTIN_RINTPD256,
28508 IX86_BUILTIN_ROUNDPD_AZ256,
28510 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28511 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28512 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28514 IX86_BUILTIN_FLOORPS256,
28515 IX86_BUILTIN_CEILPS256,
28516 IX86_BUILTIN_TRUNCPS256,
28517 IX86_BUILTIN_RINTPS256,
28518 IX86_BUILTIN_ROUNDPS_AZ256,
28520 IX86_BUILTIN_FLOORPS_SFIX256,
28521 IX86_BUILTIN_CEILPS_SFIX256,
28522 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28524 IX86_BUILTIN_UNPCKHPD256,
28525 IX86_BUILTIN_UNPCKLPD256,
28526 IX86_BUILTIN_UNPCKHPS256,
28527 IX86_BUILTIN_UNPCKLPS256,
28529 IX86_BUILTIN_SI256_SI,
28530 IX86_BUILTIN_PS256_PS,
28531 IX86_BUILTIN_PD256_PD,
28532 IX86_BUILTIN_SI_SI256,
28533 IX86_BUILTIN_PS_PS256,
28534 IX86_BUILTIN_PD_PD256,
28536 IX86_BUILTIN_VTESTZPD,
28537 IX86_BUILTIN_VTESTCPD,
28538 IX86_BUILTIN_VTESTNZCPD,
28539 IX86_BUILTIN_VTESTZPS,
28540 IX86_BUILTIN_VTESTCPS,
28541 IX86_BUILTIN_VTESTNZCPS,
28542 IX86_BUILTIN_VTESTZPD256,
28543 IX86_BUILTIN_VTESTCPD256,
28544 IX86_BUILTIN_VTESTNZCPD256,
28545 IX86_BUILTIN_VTESTZPS256,
28546 IX86_BUILTIN_VTESTCPS256,
28547 IX86_BUILTIN_VTESTNZCPS256,
28548 IX86_BUILTIN_PTESTZ256,
28549 IX86_BUILTIN_PTESTC256,
28550 IX86_BUILTIN_PTESTNZC256,
28552 IX86_BUILTIN_MOVMSKPD256,
28553 IX86_BUILTIN_MOVMSKPS256,
28556 IX86_BUILTIN_MPSADBW256,
28557 IX86_BUILTIN_PABSB256,
28558 IX86_BUILTIN_PABSW256,
28559 IX86_BUILTIN_PABSD256,
28560 IX86_BUILTIN_PACKSSDW256,
28561 IX86_BUILTIN_PACKSSWB256,
28562 IX86_BUILTIN_PACKUSDW256,
28563 IX86_BUILTIN_PACKUSWB256,
28564 IX86_BUILTIN_PADDB256,
28565 IX86_BUILTIN_PADDW256,
28566 IX86_BUILTIN_PADDD256,
28567 IX86_BUILTIN_PADDQ256,
28568 IX86_BUILTIN_PADDSB256,
28569 IX86_BUILTIN_PADDSW256,
28570 IX86_BUILTIN_PADDUSB256,
28571 IX86_BUILTIN_PADDUSW256,
28572 IX86_BUILTIN_PALIGNR256,
28573 IX86_BUILTIN_AND256I,
28574 IX86_BUILTIN_ANDNOT256I,
28575 IX86_BUILTIN_PAVGB256,
28576 IX86_BUILTIN_PAVGW256,
28577 IX86_BUILTIN_PBLENDVB256,
28578 IX86_BUILTIN_PBLENDVW256,
28579 IX86_BUILTIN_PCMPEQB256,
28580 IX86_BUILTIN_PCMPEQW256,
28581 IX86_BUILTIN_PCMPEQD256,
28582 IX86_BUILTIN_PCMPEQQ256,
28583 IX86_BUILTIN_PCMPGTB256,
28584 IX86_BUILTIN_PCMPGTW256,
28585 IX86_BUILTIN_PCMPGTD256,
28586 IX86_BUILTIN_PCMPGTQ256,
28587 IX86_BUILTIN_PHADDW256,
28588 IX86_BUILTIN_PHADDD256,
28589 IX86_BUILTIN_PHADDSW256,
28590 IX86_BUILTIN_PHSUBW256,
28591 IX86_BUILTIN_PHSUBD256,
28592 IX86_BUILTIN_PHSUBSW256,
28593 IX86_BUILTIN_PMADDUBSW256,
28594 IX86_BUILTIN_PMADDWD256,
28595 IX86_BUILTIN_PMAXSB256,
28596 IX86_BUILTIN_PMAXSW256,
28597 IX86_BUILTIN_PMAXSD256,
28598 IX86_BUILTIN_PMAXUB256,
28599 IX86_BUILTIN_PMAXUW256,
28600 IX86_BUILTIN_PMAXUD256,
28601 IX86_BUILTIN_PMINSB256,
28602 IX86_BUILTIN_PMINSW256,
28603 IX86_BUILTIN_PMINSD256,
28604 IX86_BUILTIN_PMINUB256,
28605 IX86_BUILTIN_PMINUW256,
28606 IX86_BUILTIN_PMINUD256,
28607 IX86_BUILTIN_PMOVMSKB256,
28608 IX86_BUILTIN_PMOVSXBW256,
28609 IX86_BUILTIN_PMOVSXBD256,
28610 IX86_BUILTIN_PMOVSXBQ256,
28611 IX86_BUILTIN_PMOVSXWD256,
28612 IX86_BUILTIN_PMOVSXWQ256,
28613 IX86_BUILTIN_PMOVSXDQ256,
28614 IX86_BUILTIN_PMOVZXBW256,
28615 IX86_BUILTIN_PMOVZXBD256,
28616 IX86_BUILTIN_PMOVZXBQ256,
28617 IX86_BUILTIN_PMOVZXWD256,
28618 IX86_BUILTIN_PMOVZXWQ256,
28619 IX86_BUILTIN_PMOVZXDQ256,
28620 IX86_BUILTIN_PMULDQ256,
28621 IX86_BUILTIN_PMULHRSW256,
28622 IX86_BUILTIN_PMULHUW256,
28623 IX86_BUILTIN_PMULHW256,
28624 IX86_BUILTIN_PMULLW256,
28625 IX86_BUILTIN_PMULLD256,
28626 IX86_BUILTIN_PMULUDQ256,
28627 IX86_BUILTIN_POR256,
28628 IX86_BUILTIN_PSADBW256,
28629 IX86_BUILTIN_PSHUFB256,
28630 IX86_BUILTIN_PSHUFD256,
28631 IX86_BUILTIN_PSHUFHW256,
28632 IX86_BUILTIN_PSHUFLW256,
28633 IX86_BUILTIN_PSIGNB256,
28634 IX86_BUILTIN_PSIGNW256,
28635 IX86_BUILTIN_PSIGND256,
28636 IX86_BUILTIN_PSLLDQI256,
28637 IX86_BUILTIN_PSLLWI256,
28638 IX86_BUILTIN_PSLLW256,
28639 IX86_BUILTIN_PSLLDI256,
28640 IX86_BUILTIN_PSLLD256,
28641 IX86_BUILTIN_PSLLQI256,
28642 IX86_BUILTIN_PSLLQ256,
28643 IX86_BUILTIN_PSRAWI256,
28644 IX86_BUILTIN_PSRAW256,
28645 IX86_BUILTIN_PSRADI256,
28646 IX86_BUILTIN_PSRAD256,
28647 IX86_BUILTIN_PSRLDQI256,
28648 IX86_BUILTIN_PSRLWI256,
28649 IX86_BUILTIN_PSRLW256,
28650 IX86_BUILTIN_PSRLDI256,
28651 IX86_BUILTIN_PSRLD256,
28652 IX86_BUILTIN_PSRLQI256,
28653 IX86_BUILTIN_PSRLQ256,
28654 IX86_BUILTIN_PSUBB256,
28655 IX86_BUILTIN_PSUBW256,
28656 IX86_BUILTIN_PSUBD256,
28657 IX86_BUILTIN_PSUBQ256,
28658 IX86_BUILTIN_PSUBSB256,
28659 IX86_BUILTIN_PSUBSW256,
28660 IX86_BUILTIN_PSUBUSB256,
28661 IX86_BUILTIN_PSUBUSW256,
28662 IX86_BUILTIN_PUNPCKHBW256,
28663 IX86_BUILTIN_PUNPCKHWD256,
28664 IX86_BUILTIN_PUNPCKHDQ256,
28665 IX86_BUILTIN_PUNPCKHQDQ256,
28666 IX86_BUILTIN_PUNPCKLBW256,
28667 IX86_BUILTIN_PUNPCKLWD256,
28668 IX86_BUILTIN_PUNPCKLDQ256,
28669 IX86_BUILTIN_PUNPCKLQDQ256,
28670 IX86_BUILTIN_PXOR256,
28671 IX86_BUILTIN_MOVNTDQA256,
28672 IX86_BUILTIN_VBROADCASTSS_PS,
28673 IX86_BUILTIN_VBROADCASTSS_PS256,
28674 IX86_BUILTIN_VBROADCASTSD_PD256,
28675 IX86_BUILTIN_VBROADCASTSI256,
28676 IX86_BUILTIN_PBLENDD256,
28677 IX86_BUILTIN_PBLENDD128,
28678 IX86_BUILTIN_PBROADCASTB256,
28679 IX86_BUILTIN_PBROADCASTW256,
28680 IX86_BUILTIN_PBROADCASTD256,
28681 IX86_BUILTIN_PBROADCASTQ256,
28682 IX86_BUILTIN_PBROADCASTB128,
28683 IX86_BUILTIN_PBROADCASTW128,
28684 IX86_BUILTIN_PBROADCASTD128,
28685 IX86_BUILTIN_PBROADCASTQ128,
28686 IX86_BUILTIN_VPERMVARSI256,
28687 IX86_BUILTIN_VPERMDF256,
28688 IX86_BUILTIN_VPERMVARSF256,
28689 IX86_BUILTIN_VPERMDI256,
28690 IX86_BUILTIN_VPERMTI256,
28691 IX86_BUILTIN_VEXTRACT128I256,
28692 IX86_BUILTIN_VINSERT128I256,
28693 IX86_BUILTIN_MASKLOADD,
28694 IX86_BUILTIN_MASKLOADQ,
28695 IX86_BUILTIN_MASKLOADD256,
28696 IX86_BUILTIN_MASKLOADQ256,
28697 IX86_BUILTIN_MASKSTORED,
28698 IX86_BUILTIN_MASKSTOREQ,
28699 IX86_BUILTIN_MASKSTORED256,
28700 IX86_BUILTIN_MASKSTOREQ256,
28701 IX86_BUILTIN_PSLLVV4DI,
28702 IX86_BUILTIN_PSLLVV2DI,
28703 IX86_BUILTIN_PSLLVV8SI,
28704 IX86_BUILTIN_PSLLVV4SI,
28705 IX86_BUILTIN_PSRAVV8SI,
28706 IX86_BUILTIN_PSRAVV4SI,
28707 IX86_BUILTIN_PSRLVV4DI,
28708 IX86_BUILTIN_PSRLVV2DI,
28709 IX86_BUILTIN_PSRLVV8SI,
28710 IX86_BUILTIN_PSRLVV4SI,
28712 IX86_BUILTIN_GATHERSIV2DF,
28713 IX86_BUILTIN_GATHERSIV4DF,
28714 IX86_BUILTIN_GATHERDIV2DF,
28715 IX86_BUILTIN_GATHERDIV4DF,
28716 IX86_BUILTIN_GATHERSIV4SF,
28717 IX86_BUILTIN_GATHERSIV8SF,
28718 IX86_BUILTIN_GATHERDIV4SF,
28719 IX86_BUILTIN_GATHERDIV8SF,
28720 IX86_BUILTIN_GATHERSIV2DI,
28721 IX86_BUILTIN_GATHERSIV4DI,
28722 IX86_BUILTIN_GATHERDIV2DI,
28723 IX86_BUILTIN_GATHERDIV4DI,
28724 IX86_BUILTIN_GATHERSIV4SI,
28725 IX86_BUILTIN_GATHERSIV8SI,
28726 IX86_BUILTIN_GATHERDIV4SI,
28727 IX86_BUILTIN_GATHERDIV8SI,
28730 IX86_BUILTIN_SI512_SI256,
28731 IX86_BUILTIN_PD512_PD256,
28732 IX86_BUILTIN_PS512_PS256,
28733 IX86_BUILTIN_SI512_SI,
28734 IX86_BUILTIN_PD512_PD,
28735 IX86_BUILTIN_PS512_PS,
28736 IX86_BUILTIN_ADDPD512,
28737 IX86_BUILTIN_ADDPS512,
28738 IX86_BUILTIN_ADDSD_ROUND,
28739 IX86_BUILTIN_ADDSS_ROUND,
28740 IX86_BUILTIN_ALIGND512,
28741 IX86_BUILTIN_ALIGNQ512,
28742 IX86_BUILTIN_BLENDMD512,
28743 IX86_BUILTIN_BLENDMPD512,
28744 IX86_BUILTIN_BLENDMPS512,
28745 IX86_BUILTIN_BLENDMQ512,
28746 IX86_BUILTIN_BROADCASTF32X4_512,
28747 IX86_BUILTIN_BROADCASTF64X4_512,
28748 IX86_BUILTIN_BROADCASTI32X4_512,
28749 IX86_BUILTIN_BROADCASTI64X4_512,
28750 IX86_BUILTIN_BROADCASTSD512,
28751 IX86_BUILTIN_BROADCASTSS512,
28752 IX86_BUILTIN_CMPD512,
28753 IX86_BUILTIN_CMPPD512,
28754 IX86_BUILTIN_CMPPS512,
28755 IX86_BUILTIN_CMPQ512,
28756 IX86_BUILTIN_CMPSD_MASK,
28757 IX86_BUILTIN_CMPSS_MASK,
28758 IX86_BUILTIN_COMIDF,
28759 IX86_BUILTIN_COMISF,
28760 IX86_BUILTIN_COMPRESSPD512,
28761 IX86_BUILTIN_COMPRESSPDSTORE512,
28762 IX86_BUILTIN_COMPRESSPS512,
28763 IX86_BUILTIN_COMPRESSPSSTORE512,
28764 IX86_BUILTIN_CVTDQ2PD512,
28765 IX86_BUILTIN_CVTDQ2PS512,
28766 IX86_BUILTIN_CVTPD2DQ512,
28767 IX86_BUILTIN_CVTPD2PS512,
28768 IX86_BUILTIN_CVTPD2UDQ512,
28769 IX86_BUILTIN_CVTPH2PS512,
28770 IX86_BUILTIN_CVTPS2DQ512,
28771 IX86_BUILTIN_CVTPS2PD512,
28772 IX86_BUILTIN_CVTPS2PH512,
28773 IX86_BUILTIN_CVTPS2UDQ512,
28774 IX86_BUILTIN_CVTSD2SS_ROUND,
28775 IX86_BUILTIN_CVTSI2SD64,
28776 IX86_BUILTIN_CVTSI2SS32,
28777 IX86_BUILTIN_CVTSI2SS64,
28778 IX86_BUILTIN_CVTSS2SD_ROUND,
28779 IX86_BUILTIN_CVTTPD2DQ512,
28780 IX86_BUILTIN_CVTTPD2UDQ512,
28781 IX86_BUILTIN_CVTTPS2DQ512,
28782 IX86_BUILTIN_CVTTPS2UDQ512,
28783 IX86_BUILTIN_CVTUDQ2PD512,
28784 IX86_BUILTIN_CVTUDQ2PS512,
28785 IX86_BUILTIN_CVTUSI2SD32,
28786 IX86_BUILTIN_CVTUSI2SD64,
28787 IX86_BUILTIN_CVTUSI2SS32,
28788 IX86_BUILTIN_CVTUSI2SS64,
28789 IX86_BUILTIN_DIVPD512,
28790 IX86_BUILTIN_DIVPS512,
28791 IX86_BUILTIN_DIVSD_ROUND,
28792 IX86_BUILTIN_DIVSS_ROUND,
28793 IX86_BUILTIN_EXPANDPD512,
28794 IX86_BUILTIN_EXPANDPD512Z,
28795 IX86_BUILTIN_EXPANDPDLOAD512,
28796 IX86_BUILTIN_EXPANDPDLOAD512Z,
28797 IX86_BUILTIN_EXPANDPS512,
28798 IX86_BUILTIN_EXPANDPS512Z,
28799 IX86_BUILTIN_EXPANDPSLOAD512,
28800 IX86_BUILTIN_EXPANDPSLOAD512Z,
28801 IX86_BUILTIN_EXTRACTF32X4,
28802 IX86_BUILTIN_EXTRACTF64X4,
28803 IX86_BUILTIN_EXTRACTI32X4,
28804 IX86_BUILTIN_EXTRACTI64X4,
28805 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28806 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28807 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28808 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28809 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28810 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28811 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28812 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28813 IX86_BUILTIN_GETEXPPD512,
28814 IX86_BUILTIN_GETEXPPS512,
28815 IX86_BUILTIN_GETEXPSD128,
28816 IX86_BUILTIN_GETEXPSS128,
28817 IX86_BUILTIN_GETMANTPD512,
28818 IX86_BUILTIN_GETMANTPS512,
28819 IX86_BUILTIN_GETMANTSD128,
28820 IX86_BUILTIN_GETMANTSS128,
28821 IX86_BUILTIN_INSERTF32X4,
28822 IX86_BUILTIN_INSERTF64X4,
28823 IX86_BUILTIN_INSERTI32X4,
28824 IX86_BUILTIN_INSERTI64X4,
28825 IX86_BUILTIN_LOADAPD512,
28826 IX86_BUILTIN_LOADAPS512,
28827 IX86_BUILTIN_LOADDQUDI512,
28828 IX86_BUILTIN_LOADDQUSI512,
28829 IX86_BUILTIN_LOADUPD512,
28830 IX86_BUILTIN_LOADUPS512,
28831 IX86_BUILTIN_MAXPD512,
28832 IX86_BUILTIN_MAXPS512,
28833 IX86_BUILTIN_MAXSD_ROUND,
28834 IX86_BUILTIN_MAXSS_ROUND,
28835 IX86_BUILTIN_MINPD512,
28836 IX86_BUILTIN_MINPS512,
28837 IX86_BUILTIN_MINSD_ROUND,
28838 IX86_BUILTIN_MINSS_ROUND,
28839 IX86_BUILTIN_MOVAPD512,
28840 IX86_BUILTIN_MOVAPS512,
28841 IX86_BUILTIN_MOVDDUP512,
28842 IX86_BUILTIN_MOVDQA32LOAD512,
28843 IX86_BUILTIN_MOVDQA32STORE512,
28844 IX86_BUILTIN_MOVDQA32_512,
28845 IX86_BUILTIN_MOVDQA64LOAD512,
28846 IX86_BUILTIN_MOVDQA64STORE512,
28847 IX86_BUILTIN_MOVDQA64_512,
28848 IX86_BUILTIN_MOVNTDQ512,
28849 IX86_BUILTIN_MOVNTDQA512,
28850 IX86_BUILTIN_MOVNTPD512,
28851 IX86_BUILTIN_MOVNTPS512,
28852 IX86_BUILTIN_MOVSHDUP512,
28853 IX86_BUILTIN_MOVSLDUP512,
28854 IX86_BUILTIN_MULPD512,
28855 IX86_BUILTIN_MULPS512,
28856 IX86_BUILTIN_MULSD_ROUND,
28857 IX86_BUILTIN_MULSS_ROUND,
28858 IX86_BUILTIN_PABSD512,
28859 IX86_BUILTIN_PABSQ512,
28860 IX86_BUILTIN_PADDD512,
28861 IX86_BUILTIN_PADDQ512,
28862 IX86_BUILTIN_PANDD512,
28863 IX86_BUILTIN_PANDND512,
28864 IX86_BUILTIN_PANDNQ512,
28865 IX86_BUILTIN_PANDQ512,
28866 IX86_BUILTIN_PBROADCASTD512,
28867 IX86_BUILTIN_PBROADCASTD512_GPR,
28868 IX86_BUILTIN_PBROADCASTMB512,
28869 IX86_BUILTIN_PBROADCASTMW512,
28870 IX86_BUILTIN_PBROADCASTQ512,
28871 IX86_BUILTIN_PBROADCASTQ512_GPR,
28872 IX86_BUILTIN_PCMPEQD512_MASK,
28873 IX86_BUILTIN_PCMPEQQ512_MASK,
28874 IX86_BUILTIN_PCMPGTD512_MASK,
28875 IX86_BUILTIN_PCMPGTQ512_MASK,
28876 IX86_BUILTIN_PCOMPRESSD512,
28877 IX86_BUILTIN_PCOMPRESSDSTORE512,
28878 IX86_BUILTIN_PCOMPRESSQ512,
28879 IX86_BUILTIN_PCOMPRESSQSTORE512,
28880 IX86_BUILTIN_PEXPANDD512,
28881 IX86_BUILTIN_PEXPANDD512Z,
28882 IX86_BUILTIN_PEXPANDDLOAD512,
28883 IX86_BUILTIN_PEXPANDDLOAD512Z,
28884 IX86_BUILTIN_PEXPANDQ512,
28885 IX86_BUILTIN_PEXPANDQ512Z,
28886 IX86_BUILTIN_PEXPANDQLOAD512,
28887 IX86_BUILTIN_PEXPANDQLOAD512Z,
28888 IX86_BUILTIN_PMAXSD512,
28889 IX86_BUILTIN_PMAXSQ512,
28890 IX86_BUILTIN_PMAXUD512,
28891 IX86_BUILTIN_PMAXUQ512,
28892 IX86_BUILTIN_PMINSD512,
28893 IX86_BUILTIN_PMINSQ512,
28894 IX86_BUILTIN_PMINUD512,
28895 IX86_BUILTIN_PMINUQ512,
28896 IX86_BUILTIN_PMOVDB512,
28897 IX86_BUILTIN_PMOVDB512_MEM,
28898 IX86_BUILTIN_PMOVDW512,
28899 IX86_BUILTIN_PMOVDW512_MEM,
28900 IX86_BUILTIN_PMOVQB512,
28901 IX86_BUILTIN_PMOVQB512_MEM,
28902 IX86_BUILTIN_PMOVQD512,
28903 IX86_BUILTIN_PMOVQD512_MEM,
28904 IX86_BUILTIN_PMOVQW512,
28905 IX86_BUILTIN_PMOVQW512_MEM,
28906 IX86_BUILTIN_PMOVSDB512,
28907 IX86_BUILTIN_PMOVSDB512_MEM,
28908 IX86_BUILTIN_PMOVSDW512,
28909 IX86_BUILTIN_PMOVSDW512_MEM,
28910 IX86_BUILTIN_PMOVSQB512,
28911 IX86_BUILTIN_PMOVSQB512_MEM,
28912 IX86_BUILTIN_PMOVSQD512,
28913 IX86_BUILTIN_PMOVSQD512_MEM,
28914 IX86_BUILTIN_PMOVSQW512,
28915 IX86_BUILTIN_PMOVSQW512_MEM,
28916 IX86_BUILTIN_PMOVSXBD512,
28917 IX86_BUILTIN_PMOVSXBQ512,
28918 IX86_BUILTIN_PMOVSXDQ512,
28919 IX86_BUILTIN_PMOVSXWD512,
28920 IX86_BUILTIN_PMOVSXWQ512,
28921 IX86_BUILTIN_PMOVUSDB512,
28922 IX86_BUILTIN_PMOVUSDB512_MEM,
28923 IX86_BUILTIN_PMOVUSDW512,
28924 IX86_BUILTIN_PMOVUSDW512_MEM,
28925 IX86_BUILTIN_PMOVUSQB512,
28926 IX86_BUILTIN_PMOVUSQB512_MEM,
28927 IX86_BUILTIN_PMOVUSQD512,
28928 IX86_BUILTIN_PMOVUSQD512_MEM,
28929 IX86_BUILTIN_PMOVUSQW512,
28930 IX86_BUILTIN_PMOVUSQW512_MEM,
28931 IX86_BUILTIN_PMOVZXBD512,
28932 IX86_BUILTIN_PMOVZXBQ512,
28933 IX86_BUILTIN_PMOVZXDQ512,
28934 IX86_BUILTIN_PMOVZXWD512,
28935 IX86_BUILTIN_PMOVZXWQ512,
28936 IX86_BUILTIN_PMULDQ512,
28937 IX86_BUILTIN_PMULLD512,
28938 IX86_BUILTIN_PMULUDQ512,
28939 IX86_BUILTIN_PORD512,
28940 IX86_BUILTIN_PORQ512,
28941 IX86_BUILTIN_PROLD512,
28942 IX86_BUILTIN_PROLQ512,
28943 IX86_BUILTIN_PROLVD512,
28944 IX86_BUILTIN_PROLVQ512,
28945 IX86_BUILTIN_PRORD512,
28946 IX86_BUILTIN_PRORQ512,
28947 IX86_BUILTIN_PRORVD512,
28948 IX86_BUILTIN_PRORVQ512,
28949 IX86_BUILTIN_PSHUFD512,
28950 IX86_BUILTIN_PSLLD512,
28951 IX86_BUILTIN_PSLLDI512,
28952 IX86_BUILTIN_PSLLQ512,
28953 IX86_BUILTIN_PSLLQI512,
28954 IX86_BUILTIN_PSLLVV16SI,
28955 IX86_BUILTIN_PSLLVV8DI,
28956 IX86_BUILTIN_PSRAD512,
28957 IX86_BUILTIN_PSRADI512,
28958 IX86_BUILTIN_PSRAQ512,
28959 IX86_BUILTIN_PSRAQI512,
28960 IX86_BUILTIN_PSRAVV16SI,
28961 IX86_BUILTIN_PSRAVV8DI,
28962 IX86_BUILTIN_PSRLD512,
28963 IX86_BUILTIN_PSRLDI512,
28964 IX86_BUILTIN_PSRLQ512,
28965 IX86_BUILTIN_PSRLQI512,
28966 IX86_BUILTIN_PSRLVV16SI,
28967 IX86_BUILTIN_PSRLVV8DI,
28968 IX86_BUILTIN_PSUBD512,
28969 IX86_BUILTIN_PSUBQ512,
28970 IX86_BUILTIN_PTESTMD512,
28971 IX86_BUILTIN_PTESTMQ512,
28972 IX86_BUILTIN_PTESTNMD512,
28973 IX86_BUILTIN_PTESTNMQ512,
28974 IX86_BUILTIN_PUNPCKHDQ512,
28975 IX86_BUILTIN_PUNPCKHQDQ512,
28976 IX86_BUILTIN_PUNPCKLDQ512,
28977 IX86_BUILTIN_PUNPCKLQDQ512,
28978 IX86_BUILTIN_PXORD512,
28979 IX86_BUILTIN_PXORQ512,
28980 IX86_BUILTIN_RCP14PD512,
28981 IX86_BUILTIN_RCP14PS512,
28982 IX86_BUILTIN_RCP14SD,
28983 IX86_BUILTIN_RCP14SS,
28984 IX86_BUILTIN_RNDSCALEPD,
28985 IX86_BUILTIN_RNDSCALEPS,
28986 IX86_BUILTIN_RNDSCALESD,
28987 IX86_BUILTIN_RNDSCALESS,
28988 IX86_BUILTIN_RSQRT14PD512,
28989 IX86_BUILTIN_RSQRT14PS512,
28990 IX86_BUILTIN_RSQRT14SD,
28991 IX86_BUILTIN_RSQRT14SS,
28992 IX86_BUILTIN_SCALEFPD512,
28993 IX86_BUILTIN_SCALEFPS512,
28994 IX86_BUILTIN_SCALEFSD,
28995 IX86_BUILTIN_SCALEFSS,
28996 IX86_BUILTIN_SHUFPD512,
28997 IX86_BUILTIN_SHUFPS512,
28998 IX86_BUILTIN_SHUF_F32x4,
28999 IX86_BUILTIN_SHUF_F64x2,
29000 IX86_BUILTIN_SHUF_I32x4,
29001 IX86_BUILTIN_SHUF_I64x2,
29002 IX86_BUILTIN_SQRTPD512,
29003 IX86_BUILTIN_SQRTPD512_MASK,
29004 IX86_BUILTIN_SQRTPS512_MASK,
29005 IX86_BUILTIN_SQRTPS_NR512,
29006 IX86_BUILTIN_SQRTSD_ROUND,
29007 IX86_BUILTIN_SQRTSS_ROUND,
29008 IX86_BUILTIN_STOREAPD512,
29009 IX86_BUILTIN_STOREAPS512,
29010 IX86_BUILTIN_STOREDQUDI512,
29011 IX86_BUILTIN_STOREDQUSI512,
29012 IX86_BUILTIN_STOREUPD512,
29013 IX86_BUILTIN_STOREUPS512,
29014 IX86_BUILTIN_SUBPD512,
29015 IX86_BUILTIN_SUBPS512,
29016 IX86_BUILTIN_SUBSD_ROUND,
29017 IX86_BUILTIN_SUBSS_ROUND,
29018 IX86_BUILTIN_UCMPD512,
29019 IX86_BUILTIN_UCMPQ512,
29020 IX86_BUILTIN_UNPCKHPD512,
29021 IX86_BUILTIN_UNPCKHPS512,
29022 IX86_BUILTIN_UNPCKLPD512,
29023 IX86_BUILTIN_UNPCKLPS512,
29024 IX86_BUILTIN_VCVTSD2SI32,
29025 IX86_BUILTIN_VCVTSD2SI64,
29026 IX86_BUILTIN_VCVTSD2USI32,
29027 IX86_BUILTIN_VCVTSD2USI64,
29028 IX86_BUILTIN_VCVTSS2SI32,
29029 IX86_BUILTIN_VCVTSS2SI64,
29030 IX86_BUILTIN_VCVTSS2USI32,
29031 IX86_BUILTIN_VCVTSS2USI64,
29032 IX86_BUILTIN_VCVTTSD2SI32,
29033 IX86_BUILTIN_VCVTTSD2SI64,
29034 IX86_BUILTIN_VCVTTSD2USI32,
29035 IX86_BUILTIN_VCVTTSD2USI64,
29036 IX86_BUILTIN_VCVTTSS2SI32,
29037 IX86_BUILTIN_VCVTTSS2SI64,
29038 IX86_BUILTIN_VCVTTSS2USI32,
29039 IX86_BUILTIN_VCVTTSS2USI64,
29040 IX86_BUILTIN_VFMADDPD512_MASK,
29041 IX86_BUILTIN_VFMADDPD512_MASK3,
29042 IX86_BUILTIN_VFMADDPD512_MASKZ,
29043 IX86_BUILTIN_VFMADDPS512_MASK,
29044 IX86_BUILTIN_VFMADDPS512_MASK3,
29045 IX86_BUILTIN_VFMADDPS512_MASKZ,
29046 IX86_BUILTIN_VFMADDSD3_ROUND,
29047 IX86_BUILTIN_VFMADDSS3_ROUND,
29048 IX86_BUILTIN_VFMADDSUBPD512_MASK,
29049 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
29050 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
29051 IX86_BUILTIN_VFMADDSUBPS512_MASK,
29052 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
29053 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29054 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29055 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29056 IX86_BUILTIN_VFMSUBPD512_MASK3,
29057 IX86_BUILTIN_VFMSUBPS512_MASK3,
29058 IX86_BUILTIN_VFMSUBSD3_MASK3,
29059 IX86_BUILTIN_VFMSUBSS3_MASK3,
29060 IX86_BUILTIN_VFNMADDPD512_MASK,
29061 IX86_BUILTIN_VFNMADDPS512_MASK,
29062 IX86_BUILTIN_VFNMSUBPD512_MASK,
29063 IX86_BUILTIN_VFNMSUBPD512_MASK3,
29064 IX86_BUILTIN_VFNMSUBPS512_MASK,
29065 IX86_BUILTIN_VFNMSUBPS512_MASK3,
29066 IX86_BUILTIN_VPCLZCNTD512,
29067 IX86_BUILTIN_VPCLZCNTQ512,
29068 IX86_BUILTIN_VPCONFLICTD512,
29069 IX86_BUILTIN_VPCONFLICTQ512,
29070 IX86_BUILTIN_VPERMDF512,
29071 IX86_BUILTIN_VPERMDI512,
29072 IX86_BUILTIN_VPERMI2VARD512,
29073 IX86_BUILTIN_VPERMI2VARPD512,
29074 IX86_BUILTIN_VPERMI2VARPS512,
29075 IX86_BUILTIN_VPERMI2VARQ512,
29076 IX86_BUILTIN_VPERMILPD512,
29077 IX86_BUILTIN_VPERMILPS512,
29078 IX86_BUILTIN_VPERMILVARPD512,
29079 IX86_BUILTIN_VPERMILVARPS512,
29080 IX86_BUILTIN_VPERMT2VARD512,
29081 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29082 IX86_BUILTIN_VPERMT2VARPD512,
29083 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29084 IX86_BUILTIN_VPERMT2VARPS512,
29085 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29086 IX86_BUILTIN_VPERMT2VARQ512,
29087 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29088 IX86_BUILTIN_VPERMVARDF512,
29089 IX86_BUILTIN_VPERMVARDI512,
29090 IX86_BUILTIN_VPERMVARSF512,
29091 IX86_BUILTIN_VPERMVARSI512,
29092 IX86_BUILTIN_VTERNLOGD512_MASK,
29093 IX86_BUILTIN_VTERNLOGD512_MASKZ,
29094 IX86_BUILTIN_VTERNLOGQ512_MASK,
29095 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29097 /* Mask arithmetic operations */
29098 IX86_BUILTIN_KAND16,
29099 IX86_BUILTIN_KANDN16,
29100 IX86_BUILTIN_KNOT16,
29101 IX86_BUILTIN_KOR16,
29102 IX86_BUILTIN_KORTESTC16,
29103 IX86_BUILTIN_KORTESTZ16,
29104 IX86_BUILTIN_KUNPCKBW,
29105 IX86_BUILTIN_KXNOR16,
29106 IX86_BUILTIN_KXOR16,
29107 IX86_BUILTIN_KMOV16,
29110 IX86_BUILTIN_PMOVUSQD256_MEM,
29111 IX86_BUILTIN_PMOVUSQD128_MEM,
29112 IX86_BUILTIN_PMOVSQD256_MEM,
29113 IX86_BUILTIN_PMOVSQD128_MEM,
29114 IX86_BUILTIN_PMOVQD256_MEM,
29115 IX86_BUILTIN_PMOVQD128_MEM,
29116 IX86_BUILTIN_PMOVUSQW256_MEM,
29117 IX86_BUILTIN_PMOVUSQW128_MEM,
29118 IX86_BUILTIN_PMOVSQW256_MEM,
29119 IX86_BUILTIN_PMOVSQW128_MEM,
29120 IX86_BUILTIN_PMOVQW256_MEM,
29121 IX86_BUILTIN_PMOVQW128_MEM,
29122 IX86_BUILTIN_PMOVUSQB256_MEM,
29123 IX86_BUILTIN_PMOVUSQB128_MEM,
29124 IX86_BUILTIN_PMOVSQB256_MEM,
29125 IX86_BUILTIN_PMOVSQB128_MEM,
29126 IX86_BUILTIN_PMOVQB256_MEM,
29127 IX86_BUILTIN_PMOVQB128_MEM,
29128 IX86_BUILTIN_PMOVUSDW256_MEM,
29129 IX86_BUILTIN_PMOVUSDW128_MEM,
29130 IX86_BUILTIN_PMOVSDW256_MEM,
29131 IX86_BUILTIN_PMOVSDW128_MEM,
29132 IX86_BUILTIN_PMOVDW256_MEM,
29133 IX86_BUILTIN_PMOVDW128_MEM,
29134 IX86_BUILTIN_PMOVUSDB256_MEM,
29135 IX86_BUILTIN_PMOVUSDB128_MEM,
29136 IX86_BUILTIN_PMOVSDB256_MEM,
29137 IX86_BUILTIN_PMOVSDB128_MEM,
29138 IX86_BUILTIN_PMOVDB256_MEM,
29139 IX86_BUILTIN_PMOVDB128_MEM,
29140 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29141 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29142 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29143 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29144 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29145 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29146 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29147 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29148 IX86_BUILTIN_LOADAPD256_MASK,
29149 IX86_BUILTIN_LOADAPD128_MASK,
29150 IX86_BUILTIN_LOADAPS256_MASK,
29151 IX86_BUILTIN_LOADAPS128_MASK,
29152 IX86_BUILTIN_STOREAPD256_MASK,
29153 IX86_BUILTIN_STOREAPD128_MASK,
29154 IX86_BUILTIN_STOREAPS256_MASK,
29155 IX86_BUILTIN_STOREAPS128_MASK,
29156 IX86_BUILTIN_LOADUPD256_MASK,
29157 IX86_BUILTIN_LOADUPD128_MASK,
29158 IX86_BUILTIN_LOADUPS256_MASK,
29159 IX86_BUILTIN_LOADUPS128_MASK,
29160 IX86_BUILTIN_STOREUPD256_MASK,
29161 IX86_BUILTIN_STOREUPD128_MASK,
29162 IX86_BUILTIN_STOREUPS256_MASK,
29163 IX86_BUILTIN_STOREUPS128_MASK,
29164 IX86_BUILTIN_LOADDQUDI256_MASK,
29165 IX86_BUILTIN_LOADDQUDI128_MASK,
29166 IX86_BUILTIN_LOADDQUSI256_MASK,
29167 IX86_BUILTIN_LOADDQUSI128_MASK,
29168 IX86_BUILTIN_LOADDQUHI256_MASK,
29169 IX86_BUILTIN_LOADDQUHI128_MASK,
29170 IX86_BUILTIN_LOADDQUQI256_MASK,
29171 IX86_BUILTIN_LOADDQUQI128_MASK,
29172 IX86_BUILTIN_STOREDQUDI256_MASK,
29173 IX86_BUILTIN_STOREDQUDI128_MASK,
29174 IX86_BUILTIN_STOREDQUSI256_MASK,
29175 IX86_BUILTIN_STOREDQUSI128_MASK,
29176 IX86_BUILTIN_STOREDQUHI256_MASK,
29177 IX86_BUILTIN_STOREDQUHI128_MASK,
29178 IX86_BUILTIN_STOREDQUQI256_MASK,
29179 IX86_BUILTIN_STOREDQUQI128_MASK,
29180 IX86_BUILTIN_COMPRESSPDSTORE256,
29181 IX86_BUILTIN_COMPRESSPDSTORE128,
29182 IX86_BUILTIN_COMPRESSPSSTORE256,
29183 IX86_BUILTIN_COMPRESSPSSTORE128,
29184 IX86_BUILTIN_PCOMPRESSQSTORE256,
29185 IX86_BUILTIN_PCOMPRESSQSTORE128,
29186 IX86_BUILTIN_PCOMPRESSDSTORE256,
29187 IX86_BUILTIN_PCOMPRESSDSTORE128,
29188 IX86_BUILTIN_EXPANDPDLOAD256,
29189 IX86_BUILTIN_EXPANDPDLOAD128,
29190 IX86_BUILTIN_EXPANDPSLOAD256,
29191 IX86_BUILTIN_EXPANDPSLOAD128,
29192 IX86_BUILTIN_PEXPANDQLOAD256,
29193 IX86_BUILTIN_PEXPANDQLOAD128,
29194 IX86_BUILTIN_PEXPANDDLOAD256,
29195 IX86_BUILTIN_PEXPANDDLOAD128,
29196 IX86_BUILTIN_EXPANDPDLOAD256Z,
29197 IX86_BUILTIN_EXPANDPDLOAD128Z,
29198 IX86_BUILTIN_EXPANDPSLOAD256Z,
29199 IX86_BUILTIN_EXPANDPSLOAD128Z,
29200 IX86_BUILTIN_PEXPANDQLOAD256Z,
29201 IX86_BUILTIN_PEXPANDQLOAD128Z,
29202 IX86_BUILTIN_PEXPANDDLOAD256Z,
29203 IX86_BUILTIN_PEXPANDDLOAD128Z,
29204 IX86_BUILTIN_PALIGNR256_MASK,
29205 IX86_BUILTIN_PALIGNR128_MASK,
29206 IX86_BUILTIN_MOVDQA64_256_MASK,
29207 IX86_BUILTIN_MOVDQA64_128_MASK,
29208 IX86_BUILTIN_MOVDQA32_256_MASK,
29209 IX86_BUILTIN_MOVDQA32_128_MASK,
29210 IX86_BUILTIN_MOVAPD256_MASK,
29211 IX86_BUILTIN_MOVAPD128_MASK,
29212 IX86_BUILTIN_MOVAPS256_MASK,
29213 IX86_BUILTIN_MOVAPS128_MASK,
29214 IX86_BUILTIN_MOVDQUHI256_MASK,
29215 IX86_BUILTIN_MOVDQUHI128_MASK,
29216 IX86_BUILTIN_MOVDQUQI256_MASK,
29217 IX86_BUILTIN_MOVDQUQI128_MASK,
29218 IX86_BUILTIN_MINPS128_MASK,
29219 IX86_BUILTIN_MAXPS128_MASK,
29220 IX86_BUILTIN_MINPD128_MASK,
29221 IX86_BUILTIN_MAXPD128_MASK,
29222 IX86_BUILTIN_MAXPD256_MASK,
29223 IX86_BUILTIN_MAXPS256_MASK,
29224 IX86_BUILTIN_MINPD256_MASK,
29225 IX86_BUILTIN_MINPS256_MASK,
29226 IX86_BUILTIN_MULPS128_MASK,
29227 IX86_BUILTIN_DIVPS128_MASK,
29228 IX86_BUILTIN_MULPD128_MASK,
29229 IX86_BUILTIN_DIVPD128_MASK,
29230 IX86_BUILTIN_DIVPD256_MASK,
29231 IX86_BUILTIN_DIVPS256_MASK,
29232 IX86_BUILTIN_MULPD256_MASK,
29233 IX86_BUILTIN_MULPS256_MASK,
29234 IX86_BUILTIN_ADDPD128_MASK,
29235 IX86_BUILTIN_ADDPD256_MASK,
29236 IX86_BUILTIN_ADDPS128_MASK,
29237 IX86_BUILTIN_ADDPS256_MASK,
29238 IX86_BUILTIN_SUBPD128_MASK,
29239 IX86_BUILTIN_SUBPD256_MASK,
29240 IX86_BUILTIN_SUBPS128_MASK,
29241 IX86_BUILTIN_SUBPS256_MASK,
29242 IX86_BUILTIN_XORPD256_MASK,
29243 IX86_BUILTIN_XORPD128_MASK,
29244 IX86_BUILTIN_XORPS256_MASK,
29245 IX86_BUILTIN_XORPS128_MASK,
29246 IX86_BUILTIN_ORPD256_MASK,
29247 IX86_BUILTIN_ORPD128_MASK,
29248 IX86_BUILTIN_ORPS256_MASK,
29249 IX86_BUILTIN_ORPS128_MASK,
29250 IX86_BUILTIN_BROADCASTF32x2_256,
29251 IX86_BUILTIN_BROADCASTI32x2_256,
29252 IX86_BUILTIN_BROADCASTI32x2_128,
29253 IX86_BUILTIN_BROADCASTF64X2_256,
29254 IX86_BUILTIN_BROADCASTI64X2_256,
29255 IX86_BUILTIN_BROADCASTF32X4_256,
29256 IX86_BUILTIN_BROADCASTI32X4_256,
29257 IX86_BUILTIN_EXTRACTF32X4_256,
29258 IX86_BUILTIN_EXTRACTI32X4_256,
29259 IX86_BUILTIN_DBPSADBW256,
29260 IX86_BUILTIN_DBPSADBW128,
29261 IX86_BUILTIN_CVTTPD2QQ256,
29262 IX86_BUILTIN_CVTTPD2QQ128,
29263 IX86_BUILTIN_CVTTPD2UQQ256,
29264 IX86_BUILTIN_CVTTPD2UQQ128,
29265 IX86_BUILTIN_CVTPD2QQ256,
29266 IX86_BUILTIN_CVTPD2QQ128,
29267 IX86_BUILTIN_CVTPD2UQQ256,
29268 IX86_BUILTIN_CVTPD2UQQ128,
29269 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29270 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29271 IX86_BUILTIN_CVTTPS2QQ256,
29272 IX86_BUILTIN_CVTTPS2QQ128,
29273 IX86_BUILTIN_CVTTPS2UQQ256,
29274 IX86_BUILTIN_CVTTPS2UQQ128,
29275 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29276 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29277 IX86_BUILTIN_CVTTPS2UDQ256,
29278 IX86_BUILTIN_CVTTPS2UDQ128,
29279 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29280 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29281 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29282 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29283 IX86_BUILTIN_CVTPD2DQ256_MASK,
29284 IX86_BUILTIN_CVTPD2DQ128_MASK,
29285 IX86_BUILTIN_CVTDQ2PD256_MASK,
29286 IX86_BUILTIN_CVTDQ2PD128_MASK,
29287 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29288 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29289 IX86_BUILTIN_CVTDQ2PS256_MASK,
29290 IX86_BUILTIN_CVTDQ2PS128_MASK,
29291 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29292 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29293 IX86_BUILTIN_CVTPS2PD256_MASK,
29294 IX86_BUILTIN_CVTPS2PD128_MASK,
29295 IX86_BUILTIN_PBROADCASTB256_MASK,
29296 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29297 IX86_BUILTIN_PBROADCASTB128_MASK,
29298 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29299 IX86_BUILTIN_PBROADCASTW256_MASK,
29300 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29301 IX86_BUILTIN_PBROADCASTW128_MASK,
29302 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29303 IX86_BUILTIN_PBROADCASTD256_MASK,
29304 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29305 IX86_BUILTIN_PBROADCASTD128_MASK,
29306 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29307 IX86_BUILTIN_PBROADCASTQ256_MASK,
29308 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29309 IX86_BUILTIN_PBROADCASTQ128_MASK,
29310 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29311 IX86_BUILTIN_BROADCASTSS256,
29312 IX86_BUILTIN_BROADCASTSS128,
29313 IX86_BUILTIN_BROADCASTSD256,
29314 IX86_BUILTIN_EXTRACTF64X2_256,
29315 IX86_BUILTIN_EXTRACTI64X2_256,
29316 IX86_BUILTIN_INSERTF32X4_256,
29317 IX86_BUILTIN_INSERTI32X4_256,
29318 IX86_BUILTIN_PMOVSXBW256_MASK,
29319 IX86_BUILTIN_PMOVSXBW128_MASK,
29320 IX86_BUILTIN_PMOVSXBD256_MASK,
29321 IX86_BUILTIN_PMOVSXBD128_MASK,
29322 IX86_BUILTIN_PMOVSXBQ256_MASK,
29323 IX86_BUILTIN_PMOVSXBQ128_MASK,
29324 IX86_BUILTIN_PMOVSXWD256_MASK,
29325 IX86_BUILTIN_PMOVSXWD128_MASK,
29326 IX86_BUILTIN_PMOVSXWQ256_MASK,
29327 IX86_BUILTIN_PMOVSXWQ128_MASK,
29328 IX86_BUILTIN_PMOVSXDQ256_MASK,
29329 IX86_BUILTIN_PMOVSXDQ128_MASK,
29330 IX86_BUILTIN_PMOVZXBW256_MASK,
29331 IX86_BUILTIN_PMOVZXBW128_MASK,
29332 IX86_BUILTIN_PMOVZXBD256_MASK,
29333 IX86_BUILTIN_PMOVZXBD128_MASK,
29334 IX86_BUILTIN_PMOVZXBQ256_MASK,
29335 IX86_BUILTIN_PMOVZXBQ128_MASK,
29336 IX86_BUILTIN_PMOVZXWD256_MASK,
29337 IX86_BUILTIN_PMOVZXWD128_MASK,
29338 IX86_BUILTIN_PMOVZXWQ256_MASK,
29339 IX86_BUILTIN_PMOVZXWQ128_MASK,
29340 IX86_BUILTIN_PMOVZXDQ256_MASK,
29341 IX86_BUILTIN_PMOVZXDQ128_MASK,
29342 IX86_BUILTIN_REDUCEPD256_MASK,
29343 IX86_BUILTIN_REDUCEPD128_MASK,
29344 IX86_BUILTIN_REDUCEPS256_MASK,
29345 IX86_BUILTIN_REDUCEPS128_MASK,
29346 IX86_BUILTIN_REDUCESD_MASK,
29347 IX86_BUILTIN_REDUCESS_MASK,
29348 IX86_BUILTIN_VPERMVARHI256_MASK,
29349 IX86_BUILTIN_VPERMVARHI128_MASK,
29350 IX86_BUILTIN_VPERMT2VARHI256,
29351 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29352 IX86_BUILTIN_VPERMT2VARHI128,
29353 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29354 IX86_BUILTIN_VPERMI2VARHI256,
29355 IX86_BUILTIN_VPERMI2VARHI128,
29356 IX86_BUILTIN_RCP14PD256,
29357 IX86_BUILTIN_RCP14PD128,
29358 IX86_BUILTIN_RCP14PS256,
29359 IX86_BUILTIN_RCP14PS128,
29360 IX86_BUILTIN_RSQRT14PD256_MASK,
29361 IX86_BUILTIN_RSQRT14PD128_MASK,
29362 IX86_BUILTIN_RSQRT14PS256_MASK,
29363 IX86_BUILTIN_RSQRT14PS128_MASK,
29364 IX86_BUILTIN_SQRTPD256_MASK,
29365 IX86_BUILTIN_SQRTPD128_MASK,
29366 IX86_BUILTIN_SQRTPS256_MASK,
29367 IX86_BUILTIN_SQRTPS128_MASK,
29368 IX86_BUILTIN_PADDB128_MASK,
29369 IX86_BUILTIN_PADDW128_MASK,
29370 IX86_BUILTIN_PADDD128_MASK,
29371 IX86_BUILTIN_PADDQ128_MASK,
29372 IX86_BUILTIN_PSUBB128_MASK,
29373 IX86_BUILTIN_PSUBW128_MASK,
29374 IX86_BUILTIN_PSUBD128_MASK,
29375 IX86_BUILTIN_PSUBQ128_MASK,
29376 IX86_BUILTIN_PADDSB128_MASK,
29377 IX86_BUILTIN_PADDSW128_MASK,
29378 IX86_BUILTIN_PSUBSB128_MASK,
29379 IX86_BUILTIN_PSUBSW128_MASK,
29380 IX86_BUILTIN_PADDUSB128_MASK,
29381 IX86_BUILTIN_PADDUSW128_MASK,
29382 IX86_BUILTIN_PSUBUSB128_MASK,
29383 IX86_BUILTIN_PSUBUSW128_MASK,
29384 IX86_BUILTIN_PADDB256_MASK,
29385 IX86_BUILTIN_PADDW256_MASK,
29386 IX86_BUILTIN_PADDD256_MASK,
29387 IX86_BUILTIN_PADDQ256_MASK,
29388 IX86_BUILTIN_PADDSB256_MASK,
29389 IX86_BUILTIN_PADDSW256_MASK,
29390 IX86_BUILTIN_PADDUSB256_MASK,
29391 IX86_BUILTIN_PADDUSW256_MASK,
29392 IX86_BUILTIN_PSUBB256_MASK,
29393 IX86_BUILTIN_PSUBW256_MASK,
29394 IX86_BUILTIN_PSUBD256_MASK,
29395 IX86_BUILTIN_PSUBQ256_MASK,
29396 IX86_BUILTIN_PSUBSB256_MASK,
29397 IX86_BUILTIN_PSUBSW256_MASK,
29398 IX86_BUILTIN_PSUBUSB256_MASK,
29399 IX86_BUILTIN_PSUBUSW256_MASK,
29400 IX86_BUILTIN_SHUF_F64x2_256,
29401 IX86_BUILTIN_SHUF_I64x2_256,
29402 IX86_BUILTIN_SHUF_I32x4_256,
29403 IX86_BUILTIN_SHUF_F32x4_256,
29404 IX86_BUILTIN_PMOVWB128,
29405 IX86_BUILTIN_PMOVWB256,
29406 IX86_BUILTIN_PMOVSWB128,
29407 IX86_BUILTIN_PMOVSWB256,
29408 IX86_BUILTIN_PMOVUSWB128,
29409 IX86_BUILTIN_PMOVUSWB256,
29410 IX86_BUILTIN_PMOVDB128,
29411 IX86_BUILTIN_PMOVDB256,
29412 IX86_BUILTIN_PMOVSDB128,
29413 IX86_BUILTIN_PMOVSDB256,
29414 IX86_BUILTIN_PMOVUSDB128,
29415 IX86_BUILTIN_PMOVUSDB256,
29416 IX86_BUILTIN_PMOVDW128,
29417 IX86_BUILTIN_PMOVDW256,
29418 IX86_BUILTIN_PMOVSDW128,
29419 IX86_BUILTIN_PMOVSDW256,
29420 IX86_BUILTIN_PMOVUSDW128,
29421 IX86_BUILTIN_PMOVUSDW256,
29422 IX86_BUILTIN_PMOVQB128,
29423 IX86_BUILTIN_PMOVQB256,
29424 IX86_BUILTIN_PMOVSQB128,
29425 IX86_BUILTIN_PMOVSQB256,
29426 IX86_BUILTIN_PMOVUSQB128,
29427 IX86_BUILTIN_PMOVUSQB256,
29428 IX86_BUILTIN_PMOVQW128,
29429 IX86_BUILTIN_PMOVQW256,
29430 IX86_BUILTIN_PMOVSQW128,
29431 IX86_BUILTIN_PMOVSQW256,
29432 IX86_BUILTIN_PMOVUSQW128,
29433 IX86_BUILTIN_PMOVUSQW256,
29434 IX86_BUILTIN_PMOVQD128,
29435 IX86_BUILTIN_PMOVQD256,
29436 IX86_BUILTIN_PMOVSQD128,
29437 IX86_BUILTIN_PMOVSQD256,
29438 IX86_BUILTIN_PMOVUSQD128,
29439 IX86_BUILTIN_PMOVUSQD256,
29440 IX86_BUILTIN_RANGEPD256,
29441 IX86_BUILTIN_RANGEPD128,
29442 IX86_BUILTIN_RANGEPS256,
29443 IX86_BUILTIN_RANGEPS128,
29444 IX86_BUILTIN_GETEXPPS256,
29445 IX86_BUILTIN_GETEXPPD256,
29446 IX86_BUILTIN_GETEXPPS128,
29447 IX86_BUILTIN_GETEXPPD128,
29448 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29449 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29450 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29451 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29452 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29453 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29454 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29455 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29456 IX86_BUILTIN_PABSQ256,
29457 IX86_BUILTIN_PABSQ128,
29458 IX86_BUILTIN_PABSD256_MASK,
29459 IX86_BUILTIN_PABSD128_MASK,
29460 IX86_BUILTIN_PMULHRSW256_MASK,
29461 IX86_BUILTIN_PMULHRSW128_MASK,
29462 IX86_BUILTIN_PMULHUW128_MASK,
29463 IX86_BUILTIN_PMULHUW256_MASK,
29464 IX86_BUILTIN_PMULHW256_MASK,
29465 IX86_BUILTIN_PMULHW128_MASK,
29466 IX86_BUILTIN_PMULLW256_MASK,
29467 IX86_BUILTIN_PMULLW128_MASK,
29468 IX86_BUILTIN_PMULLQ256,
29469 IX86_BUILTIN_PMULLQ128,
29470 IX86_BUILTIN_ANDPD256_MASK,
29471 IX86_BUILTIN_ANDPD128_MASK,
29472 IX86_BUILTIN_ANDPS256_MASK,
29473 IX86_BUILTIN_ANDPS128_MASK,
29474 IX86_BUILTIN_ANDNPD256_MASK,
29475 IX86_BUILTIN_ANDNPD128_MASK,
29476 IX86_BUILTIN_ANDNPS256_MASK,
29477 IX86_BUILTIN_ANDNPS128_MASK,
29478 IX86_BUILTIN_PSLLWI128_MASK,
29479 IX86_BUILTIN_PSLLDI128_MASK,
29480 IX86_BUILTIN_PSLLQI128_MASK,
29481 IX86_BUILTIN_PSLLW128_MASK,
29482 IX86_BUILTIN_PSLLD128_MASK,
29483 IX86_BUILTIN_PSLLQ128_MASK,
29484 IX86_BUILTIN_PSLLWI256_MASK ,
29485 IX86_BUILTIN_PSLLW256_MASK,
29486 IX86_BUILTIN_PSLLDI256_MASK,
29487 IX86_BUILTIN_PSLLD256_MASK,
29488 IX86_BUILTIN_PSLLQI256_MASK,
29489 IX86_BUILTIN_PSLLQ256_MASK,
29490 IX86_BUILTIN_PSRADI128_MASK,
29491 IX86_BUILTIN_PSRAD128_MASK,
29492 IX86_BUILTIN_PSRADI256_MASK,
29493 IX86_BUILTIN_PSRAD256_MASK,
29494 IX86_BUILTIN_PSRAQI128_MASK,
29495 IX86_BUILTIN_PSRAQ128_MASK,
29496 IX86_BUILTIN_PSRAQI256_MASK,
29497 IX86_BUILTIN_PSRAQ256_MASK,
29498 IX86_BUILTIN_PANDD256,
29499 IX86_BUILTIN_PANDD128,
29500 IX86_BUILTIN_PSRLDI128_MASK,
29501 IX86_BUILTIN_PSRLD128_MASK,
29502 IX86_BUILTIN_PSRLDI256_MASK,
29503 IX86_BUILTIN_PSRLD256_MASK,
29504 IX86_BUILTIN_PSRLQI128_MASK,
29505 IX86_BUILTIN_PSRLQ128_MASK,
29506 IX86_BUILTIN_PSRLQI256_MASK,
29507 IX86_BUILTIN_PSRLQ256_MASK,
29508 IX86_BUILTIN_PANDQ256,
29509 IX86_BUILTIN_PANDQ128,
29510 IX86_BUILTIN_PANDND256,
29511 IX86_BUILTIN_PANDND128,
29512 IX86_BUILTIN_PANDNQ256,
29513 IX86_BUILTIN_PANDNQ128,
29514 IX86_BUILTIN_PORD256,
29515 IX86_BUILTIN_PORD128,
29516 IX86_BUILTIN_PORQ256,
29517 IX86_BUILTIN_PORQ128,
29518 IX86_BUILTIN_PXORD256,
29519 IX86_BUILTIN_PXORD128,
29520 IX86_BUILTIN_PXORQ256,
29521 IX86_BUILTIN_PXORQ128,
29522 IX86_BUILTIN_PACKSSWB256_MASK,
29523 IX86_BUILTIN_PACKSSWB128_MASK,
29524 IX86_BUILTIN_PACKUSWB256_MASK,
29525 IX86_BUILTIN_PACKUSWB128_MASK,
29526 IX86_BUILTIN_RNDSCALEPS256,
29527 IX86_BUILTIN_RNDSCALEPD256,
29528 IX86_BUILTIN_RNDSCALEPS128,
29529 IX86_BUILTIN_RNDSCALEPD128,
29530 IX86_BUILTIN_VTERNLOGQ256_MASK,
29531 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29532 IX86_BUILTIN_VTERNLOGD256_MASK,
29533 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29534 IX86_BUILTIN_VTERNLOGQ128_MASK,
29535 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29536 IX86_BUILTIN_VTERNLOGD128_MASK,
29537 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29538 IX86_BUILTIN_SCALEFPD256,
29539 IX86_BUILTIN_SCALEFPS256,
29540 IX86_BUILTIN_SCALEFPD128,
29541 IX86_BUILTIN_SCALEFPS128,
29542 IX86_BUILTIN_VFMADDPD256_MASK,
29543 IX86_BUILTIN_VFMADDPD256_MASK3,
29544 IX86_BUILTIN_VFMADDPD256_MASKZ,
29545 IX86_BUILTIN_VFMADDPD128_MASK,
29546 IX86_BUILTIN_VFMADDPD128_MASK3,
29547 IX86_BUILTIN_VFMADDPD128_MASKZ,
29548 IX86_BUILTIN_VFMADDPS256_MASK,
29549 IX86_BUILTIN_VFMADDPS256_MASK3,
29550 IX86_BUILTIN_VFMADDPS256_MASKZ,
29551 IX86_BUILTIN_VFMADDPS128_MASK,
29552 IX86_BUILTIN_VFMADDPS128_MASK3,
29553 IX86_BUILTIN_VFMADDPS128_MASKZ,
29554 IX86_BUILTIN_VFMSUBPD256_MASK3,
29555 IX86_BUILTIN_VFMSUBPD128_MASK3,
29556 IX86_BUILTIN_VFMSUBPS256_MASK3,
29557 IX86_BUILTIN_VFMSUBPS128_MASK3,
29558 IX86_BUILTIN_VFNMADDPD256_MASK,
29559 IX86_BUILTIN_VFNMADDPD128_MASK,
29560 IX86_BUILTIN_VFNMADDPS256_MASK,
29561 IX86_BUILTIN_VFNMADDPS128_MASK,
29562 IX86_BUILTIN_VFNMSUBPD256_MASK,
29563 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29564 IX86_BUILTIN_VFNMSUBPD128_MASK,
29565 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29566 IX86_BUILTIN_VFNMSUBPS256_MASK,
29567 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29568 IX86_BUILTIN_VFNMSUBPS128_MASK,
29569 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29570 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29571 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29572 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29573 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29574 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29575 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29576 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29577 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29578 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29579 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29580 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29581 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29582 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29583 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29584 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29585 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29586 IX86_BUILTIN_INSERTF64X2_256,
29587 IX86_BUILTIN_INSERTI64X2_256,
29588 IX86_BUILTIN_PSRAVV16HI,
29589 IX86_BUILTIN_PSRAVV8HI,
29590 IX86_BUILTIN_PMADDUBSW256_MASK,
29591 IX86_BUILTIN_PMADDUBSW128_MASK,
29592 IX86_BUILTIN_PMADDWD256_MASK,
29593 IX86_BUILTIN_PMADDWD128_MASK,
29594 IX86_BUILTIN_PSRLVV16HI,
29595 IX86_BUILTIN_PSRLVV8HI,
29596 IX86_BUILTIN_CVTPS2DQ256_MASK,
29597 IX86_BUILTIN_CVTPS2DQ128_MASK,
29598 IX86_BUILTIN_CVTPS2UDQ256,
29599 IX86_BUILTIN_CVTPS2UDQ128,
29600 IX86_BUILTIN_CVTPS2QQ256,
29601 IX86_BUILTIN_CVTPS2QQ128,
29602 IX86_BUILTIN_CVTPS2UQQ256,
29603 IX86_BUILTIN_CVTPS2UQQ128,
29604 IX86_BUILTIN_GETMANTPS256,
29605 IX86_BUILTIN_GETMANTPS128,
29606 IX86_BUILTIN_GETMANTPD256,
29607 IX86_BUILTIN_GETMANTPD128,
29608 IX86_BUILTIN_MOVDDUP256_MASK,
29609 IX86_BUILTIN_MOVDDUP128_MASK,
29610 IX86_BUILTIN_MOVSHDUP256_MASK,
29611 IX86_BUILTIN_MOVSHDUP128_MASK,
29612 IX86_BUILTIN_MOVSLDUP256_MASK,
29613 IX86_BUILTIN_MOVSLDUP128_MASK,
29614 IX86_BUILTIN_CVTQQ2PS256,
29615 IX86_BUILTIN_CVTQQ2PS128,
29616 IX86_BUILTIN_CVTUQQ2PS256,
29617 IX86_BUILTIN_CVTUQQ2PS128,
29618 IX86_BUILTIN_CVTQQ2PD256,
29619 IX86_BUILTIN_CVTQQ2PD128,
29620 IX86_BUILTIN_CVTUQQ2PD256,
29621 IX86_BUILTIN_CVTUQQ2PD128,
29622 IX86_BUILTIN_VPERMT2VARQ256,
29623 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29624 IX86_BUILTIN_VPERMT2VARD256,
29625 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29626 IX86_BUILTIN_VPERMI2VARQ256,
29627 IX86_BUILTIN_VPERMI2VARD256,
29628 IX86_BUILTIN_VPERMT2VARPD256,
29629 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29630 IX86_BUILTIN_VPERMT2VARPS256,
29631 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29632 IX86_BUILTIN_VPERMI2VARPD256,
29633 IX86_BUILTIN_VPERMI2VARPS256,
29634 IX86_BUILTIN_VPERMT2VARQ128,
29635 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29636 IX86_BUILTIN_VPERMT2VARD128,
29637 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29638 IX86_BUILTIN_VPERMI2VARQ128,
29639 IX86_BUILTIN_VPERMI2VARD128,
29640 IX86_BUILTIN_VPERMT2VARPD128,
29641 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29642 IX86_BUILTIN_VPERMT2VARPS128,
29643 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29644 IX86_BUILTIN_VPERMI2VARPD128,
29645 IX86_BUILTIN_VPERMI2VARPS128,
29646 IX86_BUILTIN_PSHUFB256_MASK,
29647 IX86_BUILTIN_PSHUFB128_MASK,
29648 IX86_BUILTIN_PSHUFHW256_MASK,
29649 IX86_BUILTIN_PSHUFHW128_MASK,
29650 IX86_BUILTIN_PSHUFLW256_MASK,
29651 IX86_BUILTIN_PSHUFLW128_MASK,
29652 IX86_BUILTIN_PSHUFD256_MASK,
29653 IX86_BUILTIN_PSHUFD128_MASK,
29654 IX86_BUILTIN_SHUFPD256_MASK,
29655 IX86_BUILTIN_SHUFPD128_MASK,
29656 IX86_BUILTIN_SHUFPS256_MASK,
29657 IX86_BUILTIN_SHUFPS128_MASK,
29658 IX86_BUILTIN_PROLVQ256,
29659 IX86_BUILTIN_PROLVQ128,
29660 IX86_BUILTIN_PROLQ256,
29661 IX86_BUILTIN_PROLQ128,
29662 IX86_BUILTIN_PRORVQ256,
29663 IX86_BUILTIN_PRORVQ128,
29664 IX86_BUILTIN_PRORQ256,
29665 IX86_BUILTIN_PRORQ128,
29666 IX86_BUILTIN_PSRAVQ128,
29667 IX86_BUILTIN_PSRAVQ256,
29668 IX86_BUILTIN_PSLLVV4DI_MASK,
29669 IX86_BUILTIN_PSLLVV2DI_MASK,
29670 IX86_BUILTIN_PSLLVV8SI_MASK,
29671 IX86_BUILTIN_PSLLVV4SI_MASK,
29672 IX86_BUILTIN_PSRAVV8SI_MASK,
29673 IX86_BUILTIN_PSRAVV4SI_MASK,
29674 IX86_BUILTIN_PSRLVV4DI_MASK,
29675 IX86_BUILTIN_PSRLVV2DI_MASK,
29676 IX86_BUILTIN_PSRLVV8SI_MASK,
29677 IX86_BUILTIN_PSRLVV4SI_MASK,
29678 IX86_BUILTIN_PSRAWI256_MASK,
29679 IX86_BUILTIN_PSRAW256_MASK,
29680 IX86_BUILTIN_PSRAWI128_MASK,
29681 IX86_BUILTIN_PSRAW128_MASK,
29682 IX86_BUILTIN_PSRLWI256_MASK,
29683 IX86_BUILTIN_PSRLW256_MASK,
29684 IX86_BUILTIN_PSRLWI128_MASK,
29685 IX86_BUILTIN_PSRLW128_MASK,
29686 IX86_BUILTIN_PRORVD256,
29687 IX86_BUILTIN_PROLVD256,
29688 IX86_BUILTIN_PRORD256,
29689 IX86_BUILTIN_PROLD256,
29690 IX86_BUILTIN_PRORVD128,
29691 IX86_BUILTIN_PROLVD128,
29692 IX86_BUILTIN_PRORD128,
29693 IX86_BUILTIN_PROLD128,
29694 IX86_BUILTIN_FPCLASSPD256,
29695 IX86_BUILTIN_FPCLASSPD128,
29696 IX86_BUILTIN_FPCLASSSD,
29697 IX86_BUILTIN_FPCLASSPS256,
29698 IX86_BUILTIN_FPCLASSPS128,
29699 IX86_BUILTIN_FPCLASSSS,
29700 IX86_BUILTIN_CVTB2MASK128,
29701 IX86_BUILTIN_CVTB2MASK256,
29702 IX86_BUILTIN_CVTW2MASK128,
29703 IX86_BUILTIN_CVTW2MASK256,
29704 IX86_BUILTIN_CVTD2MASK128,
29705 IX86_BUILTIN_CVTD2MASK256,
29706 IX86_BUILTIN_CVTQ2MASK128,
29707 IX86_BUILTIN_CVTQ2MASK256,
29708 IX86_BUILTIN_CVTMASK2B128,
29709 IX86_BUILTIN_CVTMASK2B256,
29710 IX86_BUILTIN_CVTMASK2W128,
29711 IX86_BUILTIN_CVTMASK2W256,
29712 IX86_BUILTIN_CVTMASK2D128,
29713 IX86_BUILTIN_CVTMASK2D256,
29714 IX86_BUILTIN_CVTMASK2Q128,
29715 IX86_BUILTIN_CVTMASK2Q256,
29716 IX86_BUILTIN_PCMPEQB128_MASK,
29717 IX86_BUILTIN_PCMPEQB256_MASK,
29718 IX86_BUILTIN_PCMPEQW128_MASK,
29719 IX86_BUILTIN_PCMPEQW256_MASK,
29720 IX86_BUILTIN_PCMPEQD128_MASK,
29721 IX86_BUILTIN_PCMPEQD256_MASK,
29722 IX86_BUILTIN_PCMPEQQ128_MASK,
29723 IX86_BUILTIN_PCMPEQQ256_MASK,
29724 IX86_BUILTIN_PCMPGTB128_MASK,
29725 IX86_BUILTIN_PCMPGTB256_MASK,
29726 IX86_BUILTIN_PCMPGTW128_MASK,
29727 IX86_BUILTIN_PCMPGTW256_MASK,
29728 IX86_BUILTIN_PCMPGTD128_MASK,
29729 IX86_BUILTIN_PCMPGTD256_MASK,
29730 IX86_BUILTIN_PCMPGTQ128_MASK,
29731 IX86_BUILTIN_PCMPGTQ256_MASK,
29732 IX86_BUILTIN_PTESTMB128,
29733 IX86_BUILTIN_PTESTMB256,
29734 IX86_BUILTIN_PTESTMW128,
29735 IX86_BUILTIN_PTESTMW256,
29736 IX86_BUILTIN_PTESTMD128,
29737 IX86_BUILTIN_PTESTMD256,
29738 IX86_BUILTIN_PTESTMQ128,
29739 IX86_BUILTIN_PTESTMQ256,
29740 IX86_BUILTIN_PTESTNMB128,
29741 IX86_BUILTIN_PTESTNMB256,
29742 IX86_BUILTIN_PTESTNMW128,
29743 IX86_BUILTIN_PTESTNMW256,
29744 IX86_BUILTIN_PTESTNMD128,
29745 IX86_BUILTIN_PTESTNMD256,
29746 IX86_BUILTIN_PTESTNMQ128,
29747 IX86_BUILTIN_PTESTNMQ256,
29748 IX86_BUILTIN_PBROADCASTMB128,
29749 IX86_BUILTIN_PBROADCASTMB256,
29750 IX86_BUILTIN_PBROADCASTMW128,
29751 IX86_BUILTIN_PBROADCASTMW256,
29752 IX86_BUILTIN_COMPRESSPD256,
29753 IX86_BUILTIN_COMPRESSPD128,
29754 IX86_BUILTIN_COMPRESSPS256,
29755 IX86_BUILTIN_COMPRESSPS128,
29756 IX86_BUILTIN_PCOMPRESSQ256,
29757 IX86_BUILTIN_PCOMPRESSQ128,
29758 IX86_BUILTIN_PCOMPRESSD256,
29759 IX86_BUILTIN_PCOMPRESSD128,
29760 IX86_BUILTIN_EXPANDPD256,
29761 IX86_BUILTIN_EXPANDPD128,
29762 IX86_BUILTIN_EXPANDPS256,
29763 IX86_BUILTIN_EXPANDPS128,
29764 IX86_BUILTIN_PEXPANDQ256,
29765 IX86_BUILTIN_PEXPANDQ128,
29766 IX86_BUILTIN_PEXPANDD256,
29767 IX86_BUILTIN_PEXPANDD128,
29768 IX86_BUILTIN_EXPANDPD256Z,
29769 IX86_BUILTIN_EXPANDPD128Z,
29770 IX86_BUILTIN_EXPANDPS256Z,
29771 IX86_BUILTIN_EXPANDPS128Z,
29772 IX86_BUILTIN_PEXPANDQ256Z,
29773 IX86_BUILTIN_PEXPANDQ128Z,
29774 IX86_BUILTIN_PEXPANDD256Z,
29775 IX86_BUILTIN_PEXPANDD128Z,
29776 IX86_BUILTIN_PMAXSD256_MASK,
29777 IX86_BUILTIN_PMINSD256_MASK,
29778 IX86_BUILTIN_PMAXUD256_MASK,
29779 IX86_BUILTIN_PMINUD256_MASK,
29780 IX86_BUILTIN_PMAXSD128_MASK,
29781 IX86_BUILTIN_PMINSD128_MASK,
29782 IX86_BUILTIN_PMAXUD128_MASK,
29783 IX86_BUILTIN_PMINUD128_MASK,
29784 IX86_BUILTIN_PMAXSQ256_MASK,
29785 IX86_BUILTIN_PMINSQ256_MASK,
29786 IX86_BUILTIN_PMAXUQ256_MASK,
29787 IX86_BUILTIN_PMINUQ256_MASK,
29788 IX86_BUILTIN_PMAXSQ128_MASK,
29789 IX86_BUILTIN_PMINSQ128_MASK,
29790 IX86_BUILTIN_PMAXUQ128_MASK,
29791 IX86_BUILTIN_PMINUQ128_MASK,
29792 IX86_BUILTIN_PMINSB256_MASK,
29793 IX86_BUILTIN_PMINUB256_MASK,
29794 IX86_BUILTIN_PMAXSB256_MASK,
29795 IX86_BUILTIN_PMAXUB256_MASK,
29796 IX86_BUILTIN_PMINSB128_MASK,
29797 IX86_BUILTIN_PMINUB128_MASK,
29798 IX86_BUILTIN_PMAXSB128_MASK,
29799 IX86_BUILTIN_PMAXUB128_MASK,
29800 IX86_BUILTIN_PMINSW256_MASK,
29801 IX86_BUILTIN_PMINUW256_MASK,
29802 IX86_BUILTIN_PMAXSW256_MASK,
29803 IX86_BUILTIN_PMAXUW256_MASK,
29804 IX86_BUILTIN_PMINSW128_MASK,
29805 IX86_BUILTIN_PMINUW128_MASK,
29806 IX86_BUILTIN_PMAXSW128_MASK,
29807 IX86_BUILTIN_PMAXUW128_MASK,
29808 IX86_BUILTIN_VPCONFLICTQ256,
29809 IX86_BUILTIN_VPCONFLICTD256,
29810 IX86_BUILTIN_VPCLZCNTQ256,
29811 IX86_BUILTIN_VPCLZCNTD256,
29812 IX86_BUILTIN_UNPCKHPD256_MASK,
29813 IX86_BUILTIN_UNPCKHPD128_MASK,
29814 IX86_BUILTIN_UNPCKHPS256_MASK,
29815 IX86_BUILTIN_UNPCKHPS128_MASK,
29816 IX86_BUILTIN_UNPCKLPD256_MASK,
29817 IX86_BUILTIN_UNPCKLPD128_MASK,
29818 IX86_BUILTIN_UNPCKLPS256_MASK,
29819 IX86_BUILTIN_VPCONFLICTQ128,
29820 IX86_BUILTIN_VPCONFLICTD128,
29821 IX86_BUILTIN_VPCLZCNTQ128,
29822 IX86_BUILTIN_VPCLZCNTD128,
29823 IX86_BUILTIN_UNPCKLPS128_MASK,
29824 IX86_BUILTIN_ALIGND256,
29825 IX86_BUILTIN_ALIGNQ256,
29826 IX86_BUILTIN_ALIGND128,
29827 IX86_BUILTIN_ALIGNQ128,
29828 IX86_BUILTIN_CVTPS2PH256_MASK,
29829 IX86_BUILTIN_CVTPS2PH_MASK,
29830 IX86_BUILTIN_CVTPH2PS_MASK,
29831 IX86_BUILTIN_CVTPH2PS256_MASK,
29832 IX86_BUILTIN_PUNPCKHDQ128_MASK,
29833 IX86_BUILTIN_PUNPCKHDQ256_MASK,
29834 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
29835 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
29836 IX86_BUILTIN_PUNPCKLDQ128_MASK,
29837 IX86_BUILTIN_PUNPCKLDQ256_MASK,
29838 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
29839 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
29840 IX86_BUILTIN_PUNPCKHBW128_MASK,
29841 IX86_BUILTIN_PUNPCKHBW256_MASK,
29842 IX86_BUILTIN_PUNPCKHWD128_MASK,
29843 IX86_BUILTIN_PUNPCKHWD256_MASK,
29844 IX86_BUILTIN_PUNPCKLBW128_MASK,
29845 IX86_BUILTIN_PUNPCKLBW256_MASK,
29846 IX86_BUILTIN_PUNPCKLWD128_MASK,
29847 IX86_BUILTIN_PUNPCKLWD256_MASK,
29848 IX86_BUILTIN_PSLLVV16HI,
29849 IX86_BUILTIN_PSLLVV8HI,
29850 IX86_BUILTIN_PACKSSDW256_MASK,
29851 IX86_BUILTIN_PACKSSDW128_MASK,
29852 IX86_BUILTIN_PACKUSDW256_MASK,
29853 IX86_BUILTIN_PACKUSDW128_MASK,
29854 IX86_BUILTIN_PAVGB256_MASK,
29855 IX86_BUILTIN_PAVGW256_MASK,
29856 IX86_BUILTIN_PAVGB128_MASK,
29857 IX86_BUILTIN_PAVGW128_MASK,
29858 IX86_BUILTIN_VPERMVARSF256_MASK,
29859 IX86_BUILTIN_VPERMVARDF256_MASK,
29860 IX86_BUILTIN_VPERMDF256_MASK,
29861 IX86_BUILTIN_PABSB256_MASK,
29862 IX86_BUILTIN_PABSB128_MASK,
29863 IX86_BUILTIN_PABSW256_MASK,
29864 IX86_BUILTIN_PABSW128_MASK,
29865 IX86_BUILTIN_VPERMILVARPD_MASK,
29866 IX86_BUILTIN_VPERMILVARPS_MASK,
29867 IX86_BUILTIN_VPERMILVARPD256_MASK,
29868 IX86_BUILTIN_VPERMILVARPS256_MASK,
29869 IX86_BUILTIN_VPERMILPD_MASK,
29870 IX86_BUILTIN_VPERMILPS_MASK,
29871 IX86_BUILTIN_VPERMILPD256_MASK,
29872 IX86_BUILTIN_VPERMILPS256_MASK,
29873 IX86_BUILTIN_BLENDMQ256,
29874 IX86_BUILTIN_BLENDMD256,
29875 IX86_BUILTIN_BLENDMPD256,
29876 IX86_BUILTIN_BLENDMPS256,
29877 IX86_BUILTIN_BLENDMQ128,
29878 IX86_BUILTIN_BLENDMD128,
29879 IX86_BUILTIN_BLENDMPD128,
29880 IX86_BUILTIN_BLENDMPS128,
29881 IX86_BUILTIN_BLENDMW256,
29882 IX86_BUILTIN_BLENDMB256,
29883 IX86_BUILTIN_BLENDMW128,
29884 IX86_BUILTIN_BLENDMB128,
29885 IX86_BUILTIN_PMULLD256_MASK,
29886 IX86_BUILTIN_PMULLD128_MASK,
29887 IX86_BUILTIN_PMULUDQ256_MASK,
29888 IX86_BUILTIN_PMULDQ256_MASK,
29889 IX86_BUILTIN_PMULDQ128_MASK,
29890 IX86_BUILTIN_PMULUDQ128_MASK,
29891 IX86_BUILTIN_CVTPD2PS256_MASK,
29892 IX86_BUILTIN_CVTPD2PS_MASK,
29893 IX86_BUILTIN_VPERMVARSI256_MASK,
29894 IX86_BUILTIN_VPERMVARDI256_MASK,
29895 IX86_BUILTIN_VPERMDI256_MASK,
29896 IX86_BUILTIN_CMPQ256,
29897 IX86_BUILTIN_CMPD256,
29898 IX86_BUILTIN_UCMPQ256,
29899 IX86_BUILTIN_UCMPD256,
29900 IX86_BUILTIN_CMPB256,
29901 IX86_BUILTIN_CMPW256,
29902 IX86_BUILTIN_UCMPB256,
29903 IX86_BUILTIN_UCMPW256,
29904 IX86_BUILTIN_CMPPD256_MASK,
29905 IX86_BUILTIN_CMPPS256_MASK,
29906 IX86_BUILTIN_CMPQ128,
29907 IX86_BUILTIN_CMPD128,
29908 IX86_BUILTIN_UCMPQ128,
29909 IX86_BUILTIN_UCMPD128,
29910 IX86_BUILTIN_CMPB128,
29911 IX86_BUILTIN_CMPW128,
29912 IX86_BUILTIN_UCMPB128,
29913 IX86_BUILTIN_UCMPW128,
29914 IX86_BUILTIN_CMPPD128_MASK,
29915 IX86_BUILTIN_CMPPS128_MASK,
29917 IX86_BUILTIN_GATHER3SIV8SF,
29918 IX86_BUILTIN_GATHER3SIV4SF,
29919 IX86_BUILTIN_GATHER3SIV4DF,
29920 IX86_BUILTIN_GATHER3SIV2DF,
29921 IX86_BUILTIN_GATHER3DIV8SF,
29922 IX86_BUILTIN_GATHER3DIV4SF,
29923 IX86_BUILTIN_GATHER3DIV4DF,
29924 IX86_BUILTIN_GATHER3DIV2DF,
29925 IX86_BUILTIN_GATHER3SIV8SI,
29926 IX86_BUILTIN_GATHER3SIV4SI,
29927 IX86_BUILTIN_GATHER3SIV4DI,
29928 IX86_BUILTIN_GATHER3SIV2DI,
29929 IX86_BUILTIN_GATHER3DIV8SI,
29930 IX86_BUILTIN_GATHER3DIV4SI,
29931 IX86_BUILTIN_GATHER3DIV4DI,
29932 IX86_BUILTIN_GATHER3DIV2DI,
29933 IX86_BUILTIN_SCATTERSIV8SF,
29934 IX86_BUILTIN_SCATTERSIV4SF,
29935 IX86_BUILTIN_SCATTERSIV4DF,
29936 IX86_BUILTIN_SCATTERSIV2DF,
29937 IX86_BUILTIN_SCATTERDIV8SF,
29938 IX86_BUILTIN_SCATTERDIV4SF,
29939 IX86_BUILTIN_SCATTERDIV4DF,
29940 IX86_BUILTIN_SCATTERDIV2DF,
29941 IX86_BUILTIN_SCATTERSIV8SI,
29942 IX86_BUILTIN_SCATTERSIV4SI,
29943 IX86_BUILTIN_SCATTERSIV4DI,
29944 IX86_BUILTIN_SCATTERSIV2DI,
29945 IX86_BUILTIN_SCATTERDIV8SI,
29946 IX86_BUILTIN_SCATTERDIV4SI,
29947 IX86_BUILTIN_SCATTERDIV4DI,
29948 IX86_BUILTIN_SCATTERDIV2DI,
29951 IX86_BUILTIN_RANGESD128,
29952 IX86_BUILTIN_RANGESS128,
29953 IX86_BUILTIN_KUNPCKWD,
29954 IX86_BUILTIN_KUNPCKDQ,
29955 IX86_BUILTIN_BROADCASTF32x2_512,
29956 IX86_BUILTIN_BROADCASTI32x2_512,
29957 IX86_BUILTIN_BROADCASTF64X2_512,
29958 IX86_BUILTIN_BROADCASTI64X2_512,
29959 IX86_BUILTIN_BROADCASTF32X8_512,
29960 IX86_BUILTIN_BROADCASTI32X8_512,
29961 IX86_BUILTIN_EXTRACTF64X2_512,
29962 IX86_BUILTIN_EXTRACTF32X8,
29963 IX86_BUILTIN_EXTRACTI64X2_512,
29964 IX86_BUILTIN_EXTRACTI32X8,
29965 IX86_BUILTIN_REDUCEPD512_MASK,
29966 IX86_BUILTIN_REDUCEPS512_MASK,
29967 IX86_BUILTIN_PMULLQ512,
29968 IX86_BUILTIN_XORPD512,
29969 IX86_BUILTIN_XORPS512,
29970 IX86_BUILTIN_ORPD512,
29971 IX86_BUILTIN_ORPS512,
29972 IX86_BUILTIN_ANDPD512,
29973 IX86_BUILTIN_ANDPS512,
29974 IX86_BUILTIN_ANDNPD512,
29975 IX86_BUILTIN_ANDNPS512,
29976 IX86_BUILTIN_INSERTF32X8,
29977 IX86_BUILTIN_INSERTI32X8,
29978 IX86_BUILTIN_INSERTF64X2_512,
29979 IX86_BUILTIN_INSERTI64X2_512,
29980 IX86_BUILTIN_FPCLASSPD512,
29981 IX86_BUILTIN_FPCLASSPS512,
29982 IX86_BUILTIN_CVTD2MASK512,
29983 IX86_BUILTIN_CVTQ2MASK512,
29984 IX86_BUILTIN_CVTMASK2D512,
29985 IX86_BUILTIN_CVTMASK2Q512,
29986 IX86_BUILTIN_CVTPD2QQ512,
29987 IX86_BUILTIN_CVTPS2QQ512,
29988 IX86_BUILTIN_CVTPD2UQQ512,
29989 IX86_BUILTIN_CVTPS2UQQ512,
29990 IX86_BUILTIN_CVTQQ2PS512,
29991 IX86_BUILTIN_CVTUQQ2PS512,
29992 IX86_BUILTIN_CVTQQ2PD512,
29993 IX86_BUILTIN_CVTUQQ2PD512,
29994 IX86_BUILTIN_CVTTPS2QQ512,
29995 IX86_BUILTIN_CVTTPS2UQQ512,
29996 IX86_BUILTIN_CVTTPD2QQ512,
29997 IX86_BUILTIN_CVTTPD2UQQ512,
29998 IX86_BUILTIN_RANGEPS512,
29999 IX86_BUILTIN_RANGEPD512,
30002 IX86_BUILTIN_PACKUSDW512,
30003 IX86_BUILTIN_PACKSSDW512,
30004 IX86_BUILTIN_LOADDQUHI512_MASK,
30005 IX86_BUILTIN_LOADDQUQI512_MASK,
30006 IX86_BUILTIN_PSLLDQ512,
30007 IX86_BUILTIN_PSRLDQ512,
30008 IX86_BUILTIN_STOREDQUHI512_MASK,
30009 IX86_BUILTIN_STOREDQUQI512_MASK,
30010 IX86_BUILTIN_PALIGNR512,
30011 IX86_BUILTIN_PALIGNR512_MASK,
30012 IX86_BUILTIN_MOVDQUHI512_MASK,
30013 IX86_BUILTIN_MOVDQUQI512_MASK,
30014 IX86_BUILTIN_PSADBW512,
30015 IX86_BUILTIN_DBPSADBW512,
30016 IX86_BUILTIN_PBROADCASTB512,
30017 IX86_BUILTIN_PBROADCASTB512_GPR,
30018 IX86_BUILTIN_PBROADCASTW512,
30019 IX86_BUILTIN_PBROADCASTW512_GPR,
30020 IX86_BUILTIN_PMOVSXBW512_MASK,
30021 IX86_BUILTIN_PMOVZXBW512_MASK,
30022 IX86_BUILTIN_VPERMVARHI512_MASK,
30023 IX86_BUILTIN_VPERMT2VARHI512,
30024 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
30025 IX86_BUILTIN_VPERMI2VARHI512,
30026 IX86_BUILTIN_PAVGB512,
30027 IX86_BUILTIN_PAVGW512,
30028 IX86_BUILTIN_PADDB512,
30029 IX86_BUILTIN_PSUBB512,
30030 IX86_BUILTIN_PSUBSB512,
30031 IX86_BUILTIN_PADDSB512,
30032 IX86_BUILTIN_PSUBUSB512,
30033 IX86_BUILTIN_PADDUSB512,
30034 IX86_BUILTIN_PSUBW512,
30035 IX86_BUILTIN_PADDW512,
30036 IX86_BUILTIN_PSUBSW512,
30037 IX86_BUILTIN_PADDSW512,
30038 IX86_BUILTIN_PSUBUSW512,
30039 IX86_BUILTIN_PADDUSW512,
30040 IX86_BUILTIN_PMAXUW512,
30041 IX86_BUILTIN_PMAXSW512,
30042 IX86_BUILTIN_PMINUW512,
30043 IX86_BUILTIN_PMINSW512,
30044 IX86_BUILTIN_PMAXUB512,
30045 IX86_BUILTIN_PMAXSB512,
30046 IX86_BUILTIN_PMINUB512,
30047 IX86_BUILTIN_PMINSB512,
30048 IX86_BUILTIN_PMOVWB512,
30049 IX86_BUILTIN_PMOVSWB512,
30050 IX86_BUILTIN_PMOVUSWB512,
30051 IX86_BUILTIN_PMULHRSW512_MASK,
30052 IX86_BUILTIN_PMULHUW512_MASK,
30053 IX86_BUILTIN_PMULHW512_MASK,
30054 IX86_BUILTIN_PMULLW512_MASK,
30055 IX86_BUILTIN_PSLLWI512_MASK,
30056 IX86_BUILTIN_PSLLW512_MASK,
30057 IX86_BUILTIN_PACKSSWB512,
30058 IX86_BUILTIN_PACKUSWB512,
30059 IX86_BUILTIN_PSRAVV32HI,
30060 IX86_BUILTIN_PMADDUBSW512_MASK,
30061 IX86_BUILTIN_PMADDWD512_MASK,
30062 IX86_BUILTIN_PSRLVV32HI,
30063 IX86_BUILTIN_PUNPCKHBW512,
30064 IX86_BUILTIN_PUNPCKHWD512,
30065 IX86_BUILTIN_PUNPCKLBW512,
30066 IX86_BUILTIN_PUNPCKLWD512,
30067 IX86_BUILTIN_PSHUFB512,
30068 IX86_BUILTIN_PSHUFHW512,
30069 IX86_BUILTIN_PSHUFLW512,
30070 IX86_BUILTIN_PSRAWI512,
30071 IX86_BUILTIN_PSRAW512,
30072 IX86_BUILTIN_PSRLWI512,
30073 IX86_BUILTIN_PSRLW512,
30074 IX86_BUILTIN_CVTB2MASK512,
30075 IX86_BUILTIN_CVTW2MASK512,
30076 IX86_BUILTIN_CVTMASK2B512,
30077 IX86_BUILTIN_CVTMASK2W512,
30078 IX86_BUILTIN_PCMPEQB512_MASK,
30079 IX86_BUILTIN_PCMPEQW512_MASK,
30080 IX86_BUILTIN_PCMPGTB512_MASK,
30081 IX86_BUILTIN_PCMPGTW512_MASK,
30082 IX86_BUILTIN_PTESTMB512,
30083 IX86_BUILTIN_PTESTMW512,
30084 IX86_BUILTIN_PTESTNMB512,
30085 IX86_BUILTIN_PTESTNMW512,
30086 IX86_BUILTIN_PSLLVV32HI,
30087 IX86_BUILTIN_PABSB512,
30088 IX86_BUILTIN_PABSW512,
30089 IX86_BUILTIN_BLENDMW512,
30090 IX86_BUILTIN_BLENDMB512,
30091 IX86_BUILTIN_CMPB512,
30092 IX86_BUILTIN_CMPW512,
30093 IX86_BUILTIN_UCMPB512,
30094 IX86_BUILTIN_UCMPW512,
30096 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30097 where all operands are 32-byte or 64-byte wide respectively. */
30098 IX86_BUILTIN_GATHERALTSIV4DF,
30099 IX86_BUILTIN_GATHERALTDIV8SF,
30100 IX86_BUILTIN_GATHERALTSIV4DI,
30101 IX86_BUILTIN_GATHERALTDIV8SI,
30102 IX86_BUILTIN_GATHER3ALTDIV16SF,
30103 IX86_BUILTIN_GATHER3ALTDIV16SI,
30104 IX86_BUILTIN_GATHER3ALTSIV4DF,
30105 IX86_BUILTIN_GATHER3ALTDIV8SF,
30106 IX86_BUILTIN_GATHER3ALTSIV4DI,
30107 IX86_BUILTIN_GATHER3ALTDIV8SI,
30108 IX86_BUILTIN_GATHER3ALTSIV8DF,
30109 IX86_BUILTIN_GATHER3ALTSIV8DI,
30110 IX86_BUILTIN_GATHER3DIV16SF,
30111 IX86_BUILTIN_GATHER3DIV16SI,
30112 IX86_BUILTIN_GATHER3DIV8DF,
30113 IX86_BUILTIN_GATHER3DIV8DI,
30114 IX86_BUILTIN_GATHER3SIV16SF,
30115 IX86_BUILTIN_GATHER3SIV16SI,
30116 IX86_BUILTIN_GATHER3SIV8DF,
30117 IX86_BUILTIN_GATHER3SIV8DI,
30118 IX86_BUILTIN_SCATTERDIV16SF,
30119 IX86_BUILTIN_SCATTERDIV16SI,
30120 IX86_BUILTIN_SCATTERDIV8DF,
30121 IX86_BUILTIN_SCATTERDIV8DI,
30122 IX86_BUILTIN_SCATTERSIV16SF,
30123 IX86_BUILTIN_SCATTERSIV16SI,
30124 IX86_BUILTIN_SCATTERSIV8DF,
30125 IX86_BUILTIN_SCATTERSIV8DI,
30128 IX86_BUILTIN_GATHERPFQPD,
30129 IX86_BUILTIN_GATHERPFDPS,
30130 IX86_BUILTIN_GATHERPFDPD,
30131 IX86_BUILTIN_GATHERPFQPS,
30132 IX86_BUILTIN_SCATTERPFDPD,
30133 IX86_BUILTIN_SCATTERPFDPS,
30134 IX86_BUILTIN_SCATTERPFQPD,
30135 IX86_BUILTIN_SCATTERPFQPS,
30138 IX86_BUILTIN_EXP2PD_MASK,
30139 IX86_BUILTIN_EXP2PS_MASK,
30140 IX86_BUILTIN_EXP2PS,
30141 IX86_BUILTIN_RCP28PD,
30142 IX86_BUILTIN_RCP28PS,
30143 IX86_BUILTIN_RCP28SD,
30144 IX86_BUILTIN_RCP28SS,
30145 IX86_BUILTIN_RSQRT28PD,
30146 IX86_BUILTIN_RSQRT28PS,
30147 IX86_BUILTIN_RSQRT28SD,
30148 IX86_BUILTIN_RSQRT28SS,
30151 IX86_BUILTIN_VPMADD52LUQ512,
30152 IX86_BUILTIN_VPMADD52HUQ512,
30153 IX86_BUILTIN_VPMADD52LUQ256,
30154 IX86_BUILTIN_VPMADD52HUQ256,
30155 IX86_BUILTIN_VPMADD52LUQ128,
30156 IX86_BUILTIN_VPMADD52HUQ128,
30157 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30158 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30159 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30160 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30161 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30162 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30165 IX86_BUILTIN_VPMULTISHIFTQB512,
30166 IX86_BUILTIN_VPMULTISHIFTQB256,
30167 IX86_BUILTIN_VPMULTISHIFTQB128,
30168 IX86_BUILTIN_VPERMVARQI512_MASK,
30169 IX86_BUILTIN_VPERMT2VARQI512,
30170 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30171 IX86_BUILTIN_VPERMI2VARQI512,
30172 IX86_BUILTIN_VPERMVARQI256_MASK,
30173 IX86_BUILTIN_VPERMVARQI128_MASK,
30174 IX86_BUILTIN_VPERMT2VARQI256,
30175 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30176 IX86_BUILTIN_VPERMT2VARQI128,
30177 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30178 IX86_BUILTIN_VPERMI2VARQI256,
30179 IX86_BUILTIN_VPERMI2VARQI128,
30181 /* SHA builtins. */
30182 IX86_BUILTIN_SHA1MSG1,
30183 IX86_BUILTIN_SHA1MSG2,
30184 IX86_BUILTIN_SHA1NEXTE,
30185 IX86_BUILTIN_SHA1RNDS4,
30186 IX86_BUILTIN_SHA256MSG1,
30187 IX86_BUILTIN_SHA256MSG2,
30188 IX86_BUILTIN_SHA256RNDS2,
30190 /* CLWB instructions. */
30193 /* PCOMMIT instructions. */
30194 IX86_BUILTIN_PCOMMIT,
30196 /* CLFLUSHOPT instructions. */
30197 IX86_BUILTIN_CLFLUSHOPT,
30199 /* TFmode support builtins. */
30201 IX86_BUILTIN_HUGE_VALQ,
30202 IX86_BUILTIN_FABSQ,
30203 IX86_BUILTIN_COPYSIGNQ,
30205 /* Vectorizer support builtins. */
30206 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30207 IX86_BUILTIN_CPYSGNPS,
30208 IX86_BUILTIN_CPYSGNPD,
30209 IX86_BUILTIN_CPYSGNPS256,
30210 IX86_BUILTIN_CPYSGNPS512,
30211 IX86_BUILTIN_CPYSGNPD256,
30212 IX86_BUILTIN_CPYSGNPD512,
30213 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30214 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30217 /* FMA4 instructions. */
30218 IX86_BUILTIN_VFMADDSS,
30219 IX86_BUILTIN_VFMADDSD,
30220 IX86_BUILTIN_VFMADDPS,
30221 IX86_BUILTIN_VFMADDPD,
30222 IX86_BUILTIN_VFMADDPS256,
30223 IX86_BUILTIN_VFMADDPD256,
30224 IX86_BUILTIN_VFMADDSUBPS,
30225 IX86_BUILTIN_VFMADDSUBPD,
30226 IX86_BUILTIN_VFMADDSUBPS256,
30227 IX86_BUILTIN_VFMADDSUBPD256,
30229 /* FMA3 instructions. */
30230 IX86_BUILTIN_VFMADDSS3,
30231 IX86_BUILTIN_VFMADDSD3,
30233 /* XOP instructions. */
30234 IX86_BUILTIN_VPCMOV,
30235 IX86_BUILTIN_VPCMOV_V2DI,
30236 IX86_BUILTIN_VPCMOV_V4SI,
30237 IX86_BUILTIN_VPCMOV_V8HI,
30238 IX86_BUILTIN_VPCMOV_V16QI,
30239 IX86_BUILTIN_VPCMOV_V4SF,
30240 IX86_BUILTIN_VPCMOV_V2DF,
30241 IX86_BUILTIN_VPCMOV256,
30242 IX86_BUILTIN_VPCMOV_V4DI256,
30243 IX86_BUILTIN_VPCMOV_V8SI256,
30244 IX86_BUILTIN_VPCMOV_V16HI256,
30245 IX86_BUILTIN_VPCMOV_V32QI256,
30246 IX86_BUILTIN_VPCMOV_V8SF256,
30247 IX86_BUILTIN_VPCMOV_V4DF256,
30249 IX86_BUILTIN_VPPERM,
30251 IX86_BUILTIN_VPMACSSWW,
30252 IX86_BUILTIN_VPMACSWW,
30253 IX86_BUILTIN_VPMACSSWD,
30254 IX86_BUILTIN_VPMACSWD,
30255 IX86_BUILTIN_VPMACSSDD,
30256 IX86_BUILTIN_VPMACSDD,
30257 IX86_BUILTIN_VPMACSSDQL,
30258 IX86_BUILTIN_VPMACSSDQH,
30259 IX86_BUILTIN_VPMACSDQL,
30260 IX86_BUILTIN_VPMACSDQH,
30261 IX86_BUILTIN_VPMADCSSWD,
30262 IX86_BUILTIN_VPMADCSWD,
30264 IX86_BUILTIN_VPHADDBW,
30265 IX86_BUILTIN_VPHADDBD,
30266 IX86_BUILTIN_VPHADDBQ,
30267 IX86_BUILTIN_VPHADDWD,
30268 IX86_BUILTIN_VPHADDWQ,
30269 IX86_BUILTIN_VPHADDDQ,
30270 IX86_BUILTIN_VPHADDUBW,
30271 IX86_BUILTIN_VPHADDUBD,
30272 IX86_BUILTIN_VPHADDUBQ,
30273 IX86_BUILTIN_VPHADDUWD,
30274 IX86_BUILTIN_VPHADDUWQ,
30275 IX86_BUILTIN_VPHADDUDQ,
30276 IX86_BUILTIN_VPHSUBBW,
30277 IX86_BUILTIN_VPHSUBWD,
30278 IX86_BUILTIN_VPHSUBDQ,
30280 IX86_BUILTIN_VPROTB,
30281 IX86_BUILTIN_VPROTW,
30282 IX86_BUILTIN_VPROTD,
30283 IX86_BUILTIN_VPROTQ,
30284 IX86_BUILTIN_VPROTB_IMM,
30285 IX86_BUILTIN_VPROTW_IMM,
30286 IX86_BUILTIN_VPROTD_IMM,
30287 IX86_BUILTIN_VPROTQ_IMM,
30289 IX86_BUILTIN_VPSHLB,
30290 IX86_BUILTIN_VPSHLW,
30291 IX86_BUILTIN_VPSHLD,
30292 IX86_BUILTIN_VPSHLQ,
30293 IX86_BUILTIN_VPSHAB,
30294 IX86_BUILTIN_VPSHAW,
30295 IX86_BUILTIN_VPSHAD,
30296 IX86_BUILTIN_VPSHAQ,
30298 IX86_BUILTIN_VFRCZSS,
30299 IX86_BUILTIN_VFRCZSD,
30300 IX86_BUILTIN_VFRCZPS,
30301 IX86_BUILTIN_VFRCZPD,
30302 IX86_BUILTIN_VFRCZPS256,
30303 IX86_BUILTIN_VFRCZPD256,
30305 IX86_BUILTIN_VPCOMEQUB,
30306 IX86_BUILTIN_VPCOMNEUB,
30307 IX86_BUILTIN_VPCOMLTUB,
30308 IX86_BUILTIN_VPCOMLEUB,
30309 IX86_BUILTIN_VPCOMGTUB,
30310 IX86_BUILTIN_VPCOMGEUB,
30311 IX86_BUILTIN_VPCOMFALSEUB,
30312 IX86_BUILTIN_VPCOMTRUEUB,
30314 IX86_BUILTIN_VPCOMEQUW,
30315 IX86_BUILTIN_VPCOMNEUW,
30316 IX86_BUILTIN_VPCOMLTUW,
30317 IX86_BUILTIN_VPCOMLEUW,
30318 IX86_BUILTIN_VPCOMGTUW,
30319 IX86_BUILTIN_VPCOMGEUW,
30320 IX86_BUILTIN_VPCOMFALSEUW,
30321 IX86_BUILTIN_VPCOMTRUEUW,
30323 IX86_BUILTIN_VPCOMEQUD,
30324 IX86_BUILTIN_VPCOMNEUD,
30325 IX86_BUILTIN_VPCOMLTUD,
30326 IX86_BUILTIN_VPCOMLEUD,
30327 IX86_BUILTIN_VPCOMGTUD,
30328 IX86_BUILTIN_VPCOMGEUD,
30329 IX86_BUILTIN_VPCOMFALSEUD,
30330 IX86_BUILTIN_VPCOMTRUEUD,
30332 IX86_BUILTIN_VPCOMEQUQ,
30333 IX86_BUILTIN_VPCOMNEUQ,
30334 IX86_BUILTIN_VPCOMLTUQ,
30335 IX86_BUILTIN_VPCOMLEUQ,
30336 IX86_BUILTIN_VPCOMGTUQ,
30337 IX86_BUILTIN_VPCOMGEUQ,
30338 IX86_BUILTIN_VPCOMFALSEUQ,
30339 IX86_BUILTIN_VPCOMTRUEUQ,
30341 IX86_BUILTIN_VPCOMEQB,
30342 IX86_BUILTIN_VPCOMNEB,
30343 IX86_BUILTIN_VPCOMLTB,
30344 IX86_BUILTIN_VPCOMLEB,
30345 IX86_BUILTIN_VPCOMGTB,
30346 IX86_BUILTIN_VPCOMGEB,
30347 IX86_BUILTIN_VPCOMFALSEB,
30348 IX86_BUILTIN_VPCOMTRUEB,
30350 IX86_BUILTIN_VPCOMEQW,
30351 IX86_BUILTIN_VPCOMNEW,
30352 IX86_BUILTIN_VPCOMLTW,
30353 IX86_BUILTIN_VPCOMLEW,
30354 IX86_BUILTIN_VPCOMGTW,
30355 IX86_BUILTIN_VPCOMGEW,
30356 IX86_BUILTIN_VPCOMFALSEW,
30357 IX86_BUILTIN_VPCOMTRUEW,
30359 IX86_BUILTIN_VPCOMEQD,
30360 IX86_BUILTIN_VPCOMNED,
30361 IX86_BUILTIN_VPCOMLTD,
30362 IX86_BUILTIN_VPCOMLED,
30363 IX86_BUILTIN_VPCOMGTD,
30364 IX86_BUILTIN_VPCOMGED,
30365 IX86_BUILTIN_VPCOMFALSED,
30366 IX86_BUILTIN_VPCOMTRUED,
30368 IX86_BUILTIN_VPCOMEQQ,
30369 IX86_BUILTIN_VPCOMNEQ,
30370 IX86_BUILTIN_VPCOMLTQ,
30371 IX86_BUILTIN_VPCOMLEQ,
30372 IX86_BUILTIN_VPCOMGTQ,
30373 IX86_BUILTIN_VPCOMGEQ,
30374 IX86_BUILTIN_VPCOMFALSEQ,
30375 IX86_BUILTIN_VPCOMTRUEQ,
30377 /* LWP instructions. */
30378 IX86_BUILTIN_LLWPCB,
30379 IX86_BUILTIN_SLWPCB,
30380 IX86_BUILTIN_LWPVAL32,
30381 IX86_BUILTIN_LWPVAL64,
30382 IX86_BUILTIN_LWPINS32,
30383 IX86_BUILTIN_LWPINS64,
30388 IX86_BUILTIN_XBEGIN,
30390 IX86_BUILTIN_XABORT,
30391 IX86_BUILTIN_XTEST,
30394 IX86_BUILTIN_BNDMK,
30395 IX86_BUILTIN_BNDSTX,
30396 IX86_BUILTIN_BNDLDX,
30397 IX86_BUILTIN_BNDCL,
30398 IX86_BUILTIN_BNDCU,
30399 IX86_BUILTIN_BNDRET,
30400 IX86_BUILTIN_BNDNARROW,
30401 IX86_BUILTIN_BNDINT,
30402 IX86_BUILTIN_SIZEOF,
30403 IX86_BUILTIN_BNDLOWER,
30404 IX86_BUILTIN_BNDUPPER,
30406 /* BMI instructions. */
30407 IX86_BUILTIN_BEXTR32,
30408 IX86_BUILTIN_BEXTR64,
30411 /* TBM instructions. */
30412 IX86_BUILTIN_BEXTRI32,
30413 IX86_BUILTIN_BEXTRI64,
30415 /* BMI2 instructions. */
30416 IX86_BUILTIN_BZHI32,
30417 IX86_BUILTIN_BZHI64,
30418 IX86_BUILTIN_PDEP32,
30419 IX86_BUILTIN_PDEP64,
30420 IX86_BUILTIN_PEXT32,
30421 IX86_BUILTIN_PEXT64,
30423 /* ADX instructions. */
30424 IX86_BUILTIN_ADDCARRYX32,
30425 IX86_BUILTIN_ADDCARRYX64,
30427 /* SBB instructions. */
30428 IX86_BUILTIN_SBB32,
30429 IX86_BUILTIN_SBB64,
30431 /* FSGSBASE instructions. */
30432 IX86_BUILTIN_RDFSBASE32,
30433 IX86_BUILTIN_RDFSBASE64,
30434 IX86_BUILTIN_RDGSBASE32,
30435 IX86_BUILTIN_RDGSBASE64,
30436 IX86_BUILTIN_WRFSBASE32,
30437 IX86_BUILTIN_WRFSBASE64,
30438 IX86_BUILTIN_WRGSBASE32,
30439 IX86_BUILTIN_WRGSBASE64,
30441 /* RDRND instructions. */
30442 IX86_BUILTIN_RDRAND16_STEP,
30443 IX86_BUILTIN_RDRAND32_STEP,
30444 IX86_BUILTIN_RDRAND64_STEP,
30446 /* RDSEED instructions. */
30447 IX86_BUILTIN_RDSEED16_STEP,
30448 IX86_BUILTIN_RDSEED32_STEP,
30449 IX86_BUILTIN_RDSEED64_STEP,
30451 /* F16C instructions. */
30452 IX86_BUILTIN_CVTPH2PS,
30453 IX86_BUILTIN_CVTPH2PS256,
30454 IX86_BUILTIN_CVTPS2PH,
30455 IX86_BUILTIN_CVTPS2PH256,
30457 /* CFString built-in for darwin */
30458 IX86_BUILTIN_CFSTRING,
30460 /* Builtins to get CPU type and supported features. */
30461 IX86_BUILTIN_CPU_INIT,
30462 IX86_BUILTIN_CPU_IS,
30463 IX86_BUILTIN_CPU_SUPPORTS,
30465 /* Read/write FLAGS register built-ins. */
30466 IX86_BUILTIN_READ_FLAGS,
30467 IX86_BUILTIN_WRITE_FLAGS,
30472 /* Table for the ix86 builtin decls. */
30473 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30475 /* Table of all of the builtin functions that are possible with different ISA's
30476 but are waiting to be built until a function is declared to use that
30478 struct builtin_isa {
30479 const char *name; /* function name */
30480 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30481 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30482 bool const_p; /* true if the declaration is constant */
30483 bool leaf_p; /* true if the declaration has leaf attribute */
30484 bool nothrow_p; /* true if the declaration has nothrow attribute */
30485 bool set_and_not_built_p;
30488 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30491 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30492 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30493 function decl in the ix86_builtins array. Returns the function decl or
30494 NULL_TREE, if the builtin was not added.
30496 If the front end has a special hook for builtin functions, delay adding
30497 builtin functions that aren't in the current ISA until the ISA is changed
30498 with function specific optimization. Doing so, can save about 300K for the
30499 default compiler. When the builtin is expanded, check at that time whether
30502 If the front end doesn't have a special hook, record all builtins, even if
30503 it isn't an instruction set in the current ISA in case the user uses
30504 function specific options for a different ISA, so that we don't get scope
30505 errors if a builtin is added in the middle of a function scope. */
30508 def_builtin (HOST_WIDE_INT mask, const char *name,
30509 enum ix86_builtin_func_type tcode,
30510 enum ix86_builtins code)
30512 tree decl = NULL_TREE;
30514 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30516 ix86_builtins_isa[(int) code].isa = mask;
30518 mask &= ~OPTION_MASK_ISA_64BIT;
30520 || (mask & ix86_isa_flags) != 0
30521 || (lang_hooks.builtin_function
30522 == lang_hooks.builtin_function_ext_scope))
30525 tree type = ix86_get_builtin_func_type (tcode);
30526 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30528 ix86_builtins[(int) code] = decl;
30529 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30533 ix86_builtins[(int) code] = NULL_TREE;
30534 ix86_builtins_isa[(int) code].tcode = tcode;
30535 ix86_builtins_isa[(int) code].name = name;
30536 ix86_builtins_isa[(int) code].leaf_p = false;
30537 ix86_builtins_isa[(int) code].nothrow_p = false;
30538 ix86_builtins_isa[(int) code].const_p = false;
30539 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30546 /* Like def_builtin, but also marks the function decl "const". */
30549 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30550 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30552 tree decl = def_builtin (mask, name, tcode, code);
30554 TREE_READONLY (decl) = 1;
30556 ix86_builtins_isa[(int) code].const_p = true;
30561 /* Add any new builtin functions for a given ISA that may not have been
30562 declared. This saves a bit of space compared to adding all of the
30563 declarations to the tree, even if we didn't use them. */
30566 ix86_add_new_builtins (HOST_WIDE_INT isa)
30570 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30572 if ((ix86_builtins_isa[i].isa & isa) != 0
30573 && ix86_builtins_isa[i].set_and_not_built_p)
30577 /* Don't define the builtin again. */
30578 ix86_builtins_isa[i].set_and_not_built_p = false;
30580 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30581 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30582 type, i, BUILT_IN_MD, NULL,
30585 ix86_builtins[i] = decl;
30586 if (ix86_builtins_isa[i].const_p)
30587 TREE_READONLY (decl) = 1;
30588 if (ix86_builtins_isa[i].leaf_p)
30589 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30591 if (ix86_builtins_isa[i].nothrow_p)
30592 TREE_NOTHROW (decl) = 1;
30597 /* Bits for builtin_description.flag. */
30599 /* Set when we don't support the comparison natively, and should
30600 swap_comparison in order to support it. */
30601 #define BUILTIN_DESC_SWAP_OPERANDS 1
30603 struct builtin_description
30605 const HOST_WIDE_INT mask;
30606 const enum insn_code icode;
30607 const char *const name;
30608 const enum ix86_builtins code;
30609 const enum rtx_code comparison;
30613 static const struct builtin_description bdesc_comi[] =
30615 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30616 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30617 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30618 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30619 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30620 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30621 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30622 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30623 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30624 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30625 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30626 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30627 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30628 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30629 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30630 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30631 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30632 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30633 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30634 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30635 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30636 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30637 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30638 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30641 static const struct builtin_description bdesc_pcmpestr[] =
30644 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30645 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30646 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30647 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30648 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30649 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30650 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30653 static const struct builtin_description bdesc_pcmpistr[] =
30656 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30657 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30658 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30659 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30660 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30661 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30662 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30665 /* Special builtins with variable number of arguments. */
30666 static const struct builtin_description bdesc_special_args[] =
30668 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30669 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30670 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30672 /* 80387 (for use internally for atomic compound assignment). */
30673 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30674 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30675 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30676 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30679 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30682 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30684 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30685 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30686 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30687 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30688 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30689 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30690 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30691 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30692 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30694 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30695 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30696 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30697 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30698 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30699 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30700 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30701 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30704 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30705 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30706 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30708 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30709 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30710 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30711 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30713 /* SSE or 3DNow!A */
30714 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30715 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
30718 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30719 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30720 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30721 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
30722 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30723 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
30724 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
30725 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
30726 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
30727 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30729 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30730 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30733 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30736 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
30739 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30740 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
30744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
30746 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30747 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30748 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
30750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
30752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30754 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
30758 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30760 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
30761 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30762 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30764 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
30765 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
30766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
30767 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
30768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
30769 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
30770 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
30771 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
30774 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
30775 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
30776 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
30777 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
30778 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
30779 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
30780 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
30781 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
30782 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
30785 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30786 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30787 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30788 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30789 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30790 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30791 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30792 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30793 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30794 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30795 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30796 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30797 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30798 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30799 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30800 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30801 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30802 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30803 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
30806 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
30807 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
30808 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
30809 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30810 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30833 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
30834 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
30835 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
30836 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
30837 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
30838 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
30841 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30842 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30843 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30844 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30845 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30846 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30847 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30848 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30851 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30852 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
30853 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
30856 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
30857 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
30858 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
30859 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
30862 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
30863 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
30864 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
30865 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
30866 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30867 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30868 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30869 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30870 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30871 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30872 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30873 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30874 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30875 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30876 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30877 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30878 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30879 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30880 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30881 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30882 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30883 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30884 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30885 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30886 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30887 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30888 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30889 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30890 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30891 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30892 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30893 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30894 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30895 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30896 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30897 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30898 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
30899 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
30900 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
30901 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
30902 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30903 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30904 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30905 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30906 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30907 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30908 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30909 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30910 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30911 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30912 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30913 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30914 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30915 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30916 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30917 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30918 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30919 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30920 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30921 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30922 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30923 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30924 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30925 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30926 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30927 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30928 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30929 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30930 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30931 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30932 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30933 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30934 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30935 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30936 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30937 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30938 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30939 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30940 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30941 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30942 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30943 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30944 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30945 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30946 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30947 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30948 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30949 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30950 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30951 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30952 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30953 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30954 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30955 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30958 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
30961 /* Builtins with variable number of arguments. */
30962 static const struct builtin_description bdesc_args[] =
30964 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
30965 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
30966 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
30967 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
30968 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
30969 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
30970 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
30973 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30974 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30975 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30976 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30977 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30978 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30980 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30981 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30982 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30983 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30984 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30985 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30986 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30987 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30989 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30990 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30992 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30993 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30994 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30995 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30997 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30998 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30999 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31000 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31001 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31002 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31004 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31005 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31006 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31007 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31008 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
31009 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
31011 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31012 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
31013 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31015 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
31017 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31018 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31019 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31020 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31021 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31022 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31024 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31025 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31026 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31027 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31028 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31029 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31031 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31032 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31033 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31034 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31037 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31038 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31039 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31040 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31042 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31043 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31044 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31045 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31046 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31047 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31048 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31049 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31050 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31051 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31052 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31053 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31054 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31055 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31056 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31059 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31060 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31061 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31062 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31063 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31064 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31067 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31068 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31069 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31070 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31071 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31072 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31073 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31074 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31075 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31076 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31077 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31078 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31080 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31082 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31083 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31084 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31085 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31086 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31087 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31088 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31089 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31091 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31092 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31093 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31094 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31095 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31096 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31097 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31098 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31099 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31100 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31101 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31102 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31103 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31104 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31105 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31106 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31107 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31108 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31109 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31110 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31112 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31113 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31114 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31115 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31117 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31118 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31119 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31120 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31122 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31124 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31125 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31126 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31127 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31128 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31130 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31131 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31132 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31134 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31136 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31137 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31138 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31140 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31141 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31143 /* SSE MMX or 3Dnow!A */
31144 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31145 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31146 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31148 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31149 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31150 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31151 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31153 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31154 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31156 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31159 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31161 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
31162 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31163 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31164 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31165 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31167 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31168 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31169 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31170 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31171 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31173 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31175 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31176 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31177 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31178 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31180 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31181 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31182 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31184 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31185 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31186 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31187 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31188 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31189 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31190 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31191 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31193 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31194 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31195 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31197 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31199 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31200 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31201 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31202 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31203 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31204 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31205 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31206 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31207 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31208 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31209 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31210 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31211 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31212 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31214 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31215 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31216 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31219 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31220 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31221 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31222 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31224 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31226 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31227 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31228 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31230 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31232 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31233 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31234 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31235 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31236 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31237 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31238 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31239 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31241 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31242 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31243 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31244 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31245 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31246 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31247 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31248 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31250 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31251 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31253 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31254 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31255 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31256 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31258 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31259 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31261 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31262 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31263 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31264 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31265 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31266 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31268 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31269 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31270 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31271 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31273 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31274 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31275 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31276 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31277 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31278 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31279 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31280 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31282 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31283 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31284 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31286 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31287 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31289 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31290 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31292 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31294 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31295 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31296 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31297 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31299 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31300 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31301 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31302 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31303 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31304 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31305 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31307 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31308 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31309 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31310 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31311 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31312 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31313 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31315 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31316 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31317 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31318 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31326 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31329 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31330 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31333 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31334 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31336 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31337 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31338 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31339 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31340 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31341 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31344 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31345 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31346 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31347 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31348 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31349 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31351 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31352 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31353 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31354 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31355 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31356 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31357 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31358 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31359 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31360 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31361 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31362 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31363 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31364 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31365 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31366 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31367 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31368 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31369 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31370 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31371 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31372 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31373 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31374 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31377 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31378 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31381 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31382 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31383 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31384 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31385 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31386 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31387 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31388 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31389 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31390 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31392 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31393 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31394 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31395 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31396 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31397 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31398 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31399 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31400 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31401 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31402 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31403 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31404 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31406 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31407 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31408 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31409 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31410 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31411 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31412 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31413 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31414 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31415 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31416 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31417 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31420 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31421 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31422 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31423 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31425 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31426 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31427 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31428 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31430 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31431 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31433 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31434 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31436 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31437 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31438 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31439 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31441 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31442 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31444 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31445 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31447 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31448 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31449 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31452 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31453 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31454 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31455 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31456 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31459 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31460 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31461 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31462 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31465 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31466 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31468 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31469 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31470 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31471 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31474 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31477 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31478 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31479 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31480 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31481 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31482 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31483 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31484 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31485 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31486 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31487 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31488 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31489 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31490 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31491 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31492 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31493 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31494 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31495 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31496 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31497 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31498 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31499 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31500 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31501 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31502 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31504 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31505 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31506 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31507 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31509 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31510 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31511 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31512 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31513 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31514 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31515 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31516 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31517 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31518 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31519 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31520 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31521 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31522 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31523 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31524 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31525 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31526 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31527 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31528 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31529 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31530 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31531 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31532 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31533 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31534 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31535 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31536 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31537 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31538 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31539 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31540 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31541 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31542 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31544 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31545 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31546 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31548 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31549 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31550 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31551 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31552 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31554 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31556 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31557 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31559 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31560 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31561 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31562 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31564 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31565 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31567 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31568 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31570 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31571 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31572 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31573 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31575 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31576 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31578 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31579 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31581 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31582 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31583 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31584 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31586 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31587 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31588 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31589 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31590 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31591 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31593 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31594 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31595 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31596 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31597 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31598 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31599 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31600 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31601 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31602 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31603 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31604 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31605 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31606 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31607 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31609 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31610 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31612 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31613 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31615 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31618 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31619 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31620 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31621 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31622 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31623 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31624 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31625 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31626 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31627 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31628 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31629 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31630 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31631 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31632 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31633 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31634 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31635 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31636 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31637 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31638 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31639 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31640 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31641 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31642 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31643 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31644 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31645 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31646 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31647 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31648 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31649 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31650 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31651 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31652 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31653 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31654 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31655 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31656 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31657 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31658 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31659 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31660 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31661 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31662 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31663 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31664 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31665 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31666 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31667 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31668 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31669 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31670 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31671 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31672 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31673 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31674 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31675 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31676 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31677 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31678 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31679 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31680 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31681 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31682 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31683 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31684 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31685 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31686 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31687 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31688 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31689 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31690 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31691 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31692 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
31693 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31694 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31695 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31696 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31697 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31698 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31699 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31700 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31701 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31702 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31703 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31704 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31705 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31706 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31707 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31708 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31709 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31710 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31711 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31712 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31713 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31714 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31715 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31716 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31717 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31718 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31719 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31720 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31721 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31722 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31723 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31724 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31725 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31726 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31727 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31728 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31729 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31730 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31731 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31732 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31733 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31734 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31735 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31736 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31737 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31738 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31739 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
31740 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
31741 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31742 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31743 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31744 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31745 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31746 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31747 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31748 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31749 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31750 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
31751 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
31752 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
31753 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
31754 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31755 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31756 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31757 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31758 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31759 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31760 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31761 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31762 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31763 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31765 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31768 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31769 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31770 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31773 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31774 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31777 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
31778 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
31779 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
31780 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
31783 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31784 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31785 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31786 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31787 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31788 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31791 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
31792 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
31793 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
31794 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
31795 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
31796 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
31797 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31798 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31799 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31800 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31801 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31802 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31803 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
31805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31806 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
31807 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
31808 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31809 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31810 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
31815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
31817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
31822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
31823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
31824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
31825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
31826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
31827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
31828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
31829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31833 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31841 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31844 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
31846 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
31847 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
31848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
31849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31866 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31868 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31869 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31870 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31871 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31872 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31873 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31874 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31875 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31876 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31877 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31878 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31879 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31880 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31881 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31882 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31888 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31889 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31890 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31891 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31892 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31893 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31894 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31895 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31896 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31897 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31898 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31899 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31900 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31901 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31902 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31903 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31904 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31905 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31906 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31907 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31908 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31909 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31910 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31911 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31912 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31913 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31914 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31915 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31957 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31958 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31959 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31960 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
31969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
31974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
31975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
31976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
31977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
31984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
31985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
31986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
31988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
31989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
31990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
31991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
31992 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
31993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
31994 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
31995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
31997 /* Mask arithmetic operations */
31998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
32001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
32005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
32010 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32011 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32012 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32013 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32014 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32015 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32016 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
32019 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
32020 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
32021 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32022 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32023 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32024 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32025 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32026 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32027 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32028 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32029 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32030 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32031 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32032 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32033 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32034 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32035 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32036 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32037 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32038 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32039 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32040 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32041 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32042 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32043 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32044 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32045 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32046 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32047 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32048 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32049 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32050 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32051 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32052 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32053 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32054 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32055 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32056 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32057 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32058 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32059 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32060 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32061 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32062 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32063 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32064 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32065 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32066 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32067 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32068 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32069 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32070 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32071 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32072 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
32073 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
32074 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
32075 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
32076 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32077 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32078 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32079 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32080 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32081 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32082 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32083 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32084 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32085 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32086 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32087 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32088 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32089 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32090 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32091 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32092 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32093 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32094 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32095 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32096 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32097 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32098 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32099 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32100 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32101 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32102 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32103 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32104 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32105 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32106 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32107 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32108 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
32109 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
32110 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
32111 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
32112 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32113 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
32114 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
32115 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
32116 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32117 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
32118 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32119 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
32120 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32121 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
32122 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32123 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32124 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32125 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32126 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32127 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32128 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32129 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
32130 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
32131 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
32132 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
32133 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32134 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32135 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32136 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32137 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32138 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32139 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32140 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32141 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32142 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32143 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32144 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32145 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32146 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32147 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32148 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32149 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32150 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32151 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32152 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32153 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32154 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32155 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32156 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32157 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32158 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32159 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32160 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32161 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32162 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32163 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32164 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32165 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32166 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32167 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32168 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32169 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32170 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32171 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32172 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32173 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32174 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32175 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32176 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32177 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32178 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32179 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32180 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32183 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32184 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32186 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32187 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32188 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32189 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32191 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32192 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32193 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32194 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32195 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32196 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32197 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32198 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32199 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32200 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32203 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32204 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32205 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32206 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32207 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32208 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32209 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32210 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32211 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32212 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32213 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32214 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32217 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32218 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32219 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32220 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32221 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32222 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32223 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32224 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32225 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32226 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32227 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32228 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32229 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32230 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32231 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32232 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32233 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32234 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32235 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32236 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32237 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32238 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32239 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32240 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32241 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32242 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32243 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32244 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32245 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32246 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32247 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32248 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32249 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32251 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32252 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32253 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32254 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32255 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32256 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32257 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32258 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32259 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32260 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32261 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32262 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32263 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32264 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32265 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32266 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32267 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32268 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32269 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32270 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32271 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32272 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32275 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32276 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32277 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32278 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32279 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32280 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32281 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32282 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32283 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32284 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32285 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32286 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32287 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32288 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32289 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32290 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32291 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32292 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32293 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32294 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32295 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32296 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32297 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32298 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32299 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32300 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32301 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32302 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32303 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32304 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32305 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32306 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32307 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32308 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32309 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32310 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32311 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32312 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32313 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32314 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32315 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32316 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32317 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32318 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32319 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32320 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32321 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32322 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32323 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32324 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32325 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32326 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32327 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32330 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32331 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32334 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32335 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32337 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32338 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32339 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32340 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32341 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32342 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32343 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32344 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32345 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32346 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32347 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32348 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32349 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32350 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32351 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32352 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32353 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32354 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32355 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32356 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32357 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32358 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32359 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32360 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32361 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32362 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32363 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32364 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32365 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32366 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32367 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32369 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32370 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32371 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32372 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32373 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32374 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32377 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32378 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32379 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32380 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32381 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32382 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32383 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32385 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32386 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32387 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32388 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32389 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32390 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32391 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32394 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32395 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32396 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32398 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32399 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32400 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32401 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32402 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32403 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32404 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32405 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32406 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32407 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32408 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32409 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32410 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32415 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32416 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32417 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32418 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32419 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32420 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32421 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32422 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32423 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32424 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32425 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32426 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32427 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32428 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32429 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32430 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32431 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32432 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32433 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32434 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32435 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32436 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32437 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32439 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32440 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32441 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32442 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32443 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32447 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32448 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32449 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32450 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32451 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32452 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32453 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32454 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32455 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32456 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32457 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32458 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32459 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32460 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32461 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32462 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32463 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32464 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32465 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32466 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32467 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32468 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32469 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32470 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32471 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32472 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32473 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32474 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32475 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32476 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32477 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32478 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32479 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32480 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32481 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32482 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32483 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32484 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32485 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32486 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32487 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32488 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32489 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32490 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32491 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32493 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32494 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32495 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32496 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32497 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32498 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32499 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32500 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32501 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32502 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32503 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32504 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32505 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32506 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32507 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32508 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32509 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32510 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32511 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32512 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32513 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32514 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32515 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32516 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32517 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32518 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32519 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32520 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32521 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32522 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32523 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32524 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32525 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32526 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32527 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32528 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32529 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32530 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32531 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32532 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32533 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32534 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32536 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32538 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32539 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32540 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32541 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32542 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32543 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32544 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32545 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32546 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32547 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32548 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32549 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32550 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32551 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32552 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32553 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32554 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32555 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32556 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32557 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32558 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32559 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32560 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32561 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32562 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32563 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32564 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32565 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32566 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32567 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32568 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32569 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32570 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32571 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32572 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32573 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32574 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32575 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32576 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32577 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32578 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32579 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32580 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32581 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32582 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32583 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32584 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32585 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32586 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32587 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32588 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32589 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32590 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32591 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32592 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32593 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32594 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32595 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32596 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32597 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32598 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32599 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32600 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32601 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32602 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32603 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32604 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32605 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32606 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32607 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32608 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32609 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32610 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32611 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32612 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32613 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32614 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32615 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32616 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32617 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32618 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32619 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32620 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32621 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32622 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32623 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32624 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32625 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32626 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32627 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32628 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32629 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32630 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32631 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32632 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32633 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32634 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32635 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32636 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32637 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32638 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32639 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32640 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32641 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32642 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32643 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32644 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32645 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32646 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32647 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32648 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32649 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32650 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32651 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32652 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32653 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32654 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32655 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32656 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32657 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32658 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32659 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32660 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32661 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32662 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32663 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32664 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32665 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32666 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32667 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32668 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32669 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32670 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32671 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32672 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32673 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32674 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32675 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32676 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32677 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32678 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32679 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32680 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32681 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32682 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32683 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32684 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32685 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32686 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32687 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32688 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32689 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32690 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32691 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32692 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32693 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32694 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32695 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32696 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32697 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32698 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32699 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32700 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32701 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32702 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32703 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32704 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32705 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32706 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
32707 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
32708 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32709 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32710 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32711 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32712 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32713 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32714 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32715 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32716 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32717 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32718 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32719 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
32720 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
32721 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32722 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32723 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32724 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32725 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32726 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32727 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32728 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32729 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
32730 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
32733 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32734 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32735 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32736 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32737 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
32738 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
32739 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
32740 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
32741 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
32742 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
32743 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32744 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32745 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32746 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32747 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32748 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32749 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32750 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32751 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32752 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
32753 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32754 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
32755 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
32756 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
32757 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
32758 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
32759 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
32760 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
32761 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
32762 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
32763 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
32766 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
32767 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
32768 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32769 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32770 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32771 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32772 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
32773 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
32774 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32775 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32776 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
32777 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
32778 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
32779 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
32780 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
32781 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
32782 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32783 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32784 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32785 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32786 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32787 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32788 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32789 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32790 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32791 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32792 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32793 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32794 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32795 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32796 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32797 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32798 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32799 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32800 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32801 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32802 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32803 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32804 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32805 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32806 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32807 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32808 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32809 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32810 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32811 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32812 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32813 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32814 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32815 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32816 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32817 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32818 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32819 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32820 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32821 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32822 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
32823 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
32824 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32825 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32826 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32827 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32828 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32829 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32830 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32831 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32832 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32833 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32834 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32835 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32836 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
32837 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
32838 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
32839 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
32840 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32841 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32842 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32843 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32844 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32845 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32846 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32847 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32848 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32849 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32850 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32851 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32852 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32853 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32854 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32855 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32856 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32859 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32860 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32861 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32862 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32863 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32864 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32865 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32866 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32867 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32868 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32869 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32870 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32873 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32874 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32875 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32876 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32877 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32878 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32879 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32880 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32881 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32882 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32883 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32884 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32885 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32886 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32887 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32890 /* Builtins with rounding support. */
32891 static const struct builtin_description bdesc_round_args[] =
32894 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32895 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32896 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32897 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32898 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
32899 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
32900 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
32901 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
32902 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
32903 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
32904 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32905 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32906 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
32907 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32908 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
32909 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32910 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
32911 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32912 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
32913 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
32914 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
32915 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
32916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
32917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32922 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
32923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
32924 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
32925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
32974 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
32975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
32976 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
32977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
32978 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
32979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
32980 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
32981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
32982 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
32983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
32984 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
32985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
32986 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
32987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
32988 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
32989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32994 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
32996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
32997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33015 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33016 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33017 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33018 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33019 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33020 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33021 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33022 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33023 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33024 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33027 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33028 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33029 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33030 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33031 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33032 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33033 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33034 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33035 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33036 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33037 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33038 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33039 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33040 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33041 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
33042 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
33045 /* Bultins for MPX. */
33046 static const struct builtin_description bdesc_mpx[] =
33048 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33049 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33050 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33053 /* Const builtins for MPX. */
33054 static const struct builtin_description bdesc_mpx_const[] =
33056 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33057 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33058 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33059 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33060 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33061 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33062 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33063 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33066 /* FMA4 and XOP. */
33067 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33068 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33069 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33070 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33071 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
33072 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
33073 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
33074 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
33075 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
33076 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
33077 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
33078 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
33079 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
33080 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
33081 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
33082 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
33083 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
33084 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
33085 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
33086 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
33087 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
33088 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
33089 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
33090 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
33091 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
33092 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
33093 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
33094 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
33095 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
33096 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
33097 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
33098 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
33099 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
33100 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
33101 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
33102 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
33103 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
33104 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
33105 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
33106 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
33107 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
33108 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
33109 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
33110 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
33111 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
33112 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
33113 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
33114 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
33115 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
33116 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
33117 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
33118 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
33120 static const struct builtin_description bdesc_multi_arg[] =
33122 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33123 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33124 UNKNOWN, (int)MULTI_ARG_3_SF },
33125 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33126 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33127 UNKNOWN, (int)MULTI_ARG_3_DF },
33129 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33130 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33131 UNKNOWN, (int)MULTI_ARG_3_SF },
33132 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33133 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33134 UNKNOWN, (int)MULTI_ARG_3_DF },
33136 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33137 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33138 UNKNOWN, (int)MULTI_ARG_3_SF },
33139 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33140 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33141 UNKNOWN, (int)MULTI_ARG_3_DF },
33142 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33143 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33144 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33145 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33146 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33147 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33149 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33150 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33151 UNKNOWN, (int)MULTI_ARG_3_SF },
33152 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33153 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33154 UNKNOWN, (int)MULTI_ARG_3_DF },
33155 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33156 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33157 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33158 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33159 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33160 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33162 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
33163 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
33164 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
33165 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
33166 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
33167 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
33168 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
33170 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33171 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33172 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
33173 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
33174 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
33175 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
33176 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
33178 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
33180 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33181 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33182 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33183 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33184 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33185 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33186 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33187 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33188 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33189 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33191 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33193 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33194 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
33195 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
33196 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
33197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
33198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
33199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
33200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
33201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33202 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
33203 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
33204 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
33205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33207 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33208 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33210 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33211 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33212 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33213 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33214 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33215 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33217 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33218 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33219 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33223 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33226 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33227 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33228 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33231 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33233 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33234 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33235 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33236 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33241 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33242 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33243 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33250 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33251 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33252 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33255 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33257 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33258 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33259 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33263 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33265 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33266 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33267 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33268 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33270 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33273 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33274 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33275 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33276 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33277 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33278 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33279 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33281 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33282 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33283 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33284 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33285 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33286 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33287 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33289 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33290 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33291 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33292 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33293 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33294 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33295 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33297 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33298 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33299 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33300 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33301 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33302 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33303 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33304 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33306 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33307 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33308 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33309 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33310 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33311 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33312 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33313 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33315 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33316 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33317 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33318 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33322 /* TM vector builtins. */
33324 /* Reuse the existing x86-specific `struct builtin_description' cause
33325 we're lazy. Add casts to make them fit. */
33326 static const struct builtin_description bdesc_tm[] =
33328 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33329 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33330 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33331 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33332 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33333 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33334 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33336 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33337 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33338 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33339 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33340 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33341 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33342 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33344 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33345 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33346 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33347 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33348 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33349 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33350 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33352 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33353 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33354 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33357 /* TM callbacks. */
33359 /* Return the builtin decl needed to load a vector of TYPE. */
33362 ix86_builtin_tm_load (tree type)
33364 if (TREE_CODE (type) == VECTOR_TYPE)
33366 switch (tree_to_uhwi (TYPE_SIZE (type)))
33369 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33371 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33373 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33379 /* Return the builtin decl needed to store a vector of TYPE. */
33382 ix86_builtin_tm_store (tree type)
33384 if (TREE_CODE (type) == VECTOR_TYPE)
33386 switch (tree_to_uhwi (TYPE_SIZE (type)))
33389 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33391 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33393 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33399 /* Initialize the transactional memory vector load/store builtins. */
33402 ix86_init_tm_builtins (void)
33404 enum ix86_builtin_func_type ftype;
33405 const struct builtin_description *d;
33408 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33409 tree attrs_log, attrs_type_log;
33414 /* If there are no builtins defined, we must be compiling in a
33415 language without trans-mem support. */
33416 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33419 /* Use whatever attributes a normal TM load has. */
33420 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33421 attrs_load = DECL_ATTRIBUTES (decl);
33422 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33423 /* Use whatever attributes a normal TM store has. */
33424 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33425 attrs_store = DECL_ATTRIBUTES (decl);
33426 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33427 /* Use whatever attributes a normal TM log has. */
33428 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33429 attrs_log = DECL_ATTRIBUTES (decl);
33430 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33432 for (i = 0, d = bdesc_tm;
33433 i < ARRAY_SIZE (bdesc_tm);
33436 if ((d->mask & ix86_isa_flags) != 0
33437 || (lang_hooks.builtin_function
33438 == lang_hooks.builtin_function_ext_scope))
33440 tree type, attrs, attrs_type;
33441 enum built_in_function code = (enum built_in_function) d->code;
33443 ftype = (enum ix86_builtin_func_type) d->flag;
33444 type = ix86_get_builtin_func_type (ftype);
33446 if (BUILTIN_TM_LOAD_P (code))
33448 attrs = attrs_load;
33449 attrs_type = attrs_type_load;
33451 else if (BUILTIN_TM_STORE_P (code))
33453 attrs = attrs_store;
33454 attrs_type = attrs_type_store;
33459 attrs_type = attrs_type_log;
33461 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33462 /* The builtin without the prefix for
33463 calling it directly. */
33464 d->name + strlen ("__builtin_"),
33466 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33467 set the TYPE_ATTRIBUTES. */
33468 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33470 set_builtin_decl (code, decl, false);
33475 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33476 in the current target ISA to allow the user to compile particular modules
33477 with different target specific options that differ from the command line
33480 ix86_init_mmx_sse_builtins (void)
33482 const struct builtin_description * d;
33483 enum ix86_builtin_func_type ftype;
33486 /* Add all special builtins with variable number of operands. */
33487 for (i = 0, d = bdesc_special_args;
33488 i < ARRAY_SIZE (bdesc_special_args);
33494 ftype = (enum ix86_builtin_func_type) d->flag;
33495 def_builtin (d->mask, d->name, ftype, d->code);
33498 /* Add all builtins with variable number of operands. */
33499 for (i = 0, d = bdesc_args;
33500 i < ARRAY_SIZE (bdesc_args);
33506 ftype = (enum ix86_builtin_func_type) d->flag;
33507 def_builtin_const (d->mask, d->name, ftype, d->code);
33510 /* Add all builtins with rounding. */
33511 for (i = 0, d = bdesc_round_args;
33512 i < ARRAY_SIZE (bdesc_round_args);
33518 ftype = (enum ix86_builtin_func_type) d->flag;
33519 def_builtin_const (d->mask, d->name, ftype, d->code);
33522 /* pcmpestr[im] insns. */
33523 for (i = 0, d = bdesc_pcmpestr;
33524 i < ARRAY_SIZE (bdesc_pcmpestr);
33527 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33528 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33530 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33531 def_builtin_const (d->mask, d->name, ftype, d->code);
33534 /* pcmpistr[im] insns. */
33535 for (i = 0, d = bdesc_pcmpistr;
33536 i < ARRAY_SIZE (bdesc_pcmpistr);
33539 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33540 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33542 ftype = INT_FTYPE_V16QI_V16QI_INT;
33543 def_builtin_const (d->mask, d->name, ftype, d->code);
33546 /* comi/ucomi insns. */
33547 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33549 if (d->mask == OPTION_MASK_ISA_SSE2)
33550 ftype = INT_FTYPE_V2DF_V2DF;
33552 ftype = INT_FTYPE_V4SF_V4SF;
33553 def_builtin_const (d->mask, d->name, ftype, d->code);
33557 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33558 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33559 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33560 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33562 /* SSE or 3DNow!A */
33563 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33564 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33565 IX86_BUILTIN_MASKMOVQ);
33568 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33569 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33571 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33572 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33573 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33574 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33577 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33578 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33579 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33580 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33583 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33584 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33585 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33586 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33587 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33588 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33589 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33590 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33591 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33592 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33593 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33594 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33597 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33598 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33601 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33602 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33603 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33604 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33605 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33606 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33607 IX86_BUILTIN_RDRAND64_STEP);
33610 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33611 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33612 IX86_BUILTIN_GATHERSIV2DF);
33614 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33615 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33616 IX86_BUILTIN_GATHERSIV4DF);
33618 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33619 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33620 IX86_BUILTIN_GATHERDIV2DF);
33622 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33623 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33624 IX86_BUILTIN_GATHERDIV4DF);
33626 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33627 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33628 IX86_BUILTIN_GATHERSIV4SF);
33630 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33631 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33632 IX86_BUILTIN_GATHERSIV8SF);
33634 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33635 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33636 IX86_BUILTIN_GATHERDIV4SF);
33638 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33639 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33640 IX86_BUILTIN_GATHERDIV8SF);
33642 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33643 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33644 IX86_BUILTIN_GATHERSIV2DI);
33646 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33647 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33648 IX86_BUILTIN_GATHERSIV4DI);
33650 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33651 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33652 IX86_BUILTIN_GATHERDIV2DI);
33654 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33655 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33656 IX86_BUILTIN_GATHERDIV4DI);
33658 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33659 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33660 IX86_BUILTIN_GATHERSIV4SI);
33662 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33663 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33664 IX86_BUILTIN_GATHERSIV8SI);
33666 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33667 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33668 IX86_BUILTIN_GATHERDIV4SI);
33670 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33671 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33672 IX86_BUILTIN_GATHERDIV8SI);
33674 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33675 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33676 IX86_BUILTIN_GATHERALTSIV4DF);
33678 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33679 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33680 IX86_BUILTIN_GATHERALTDIV8SF);
33682 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33683 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33684 IX86_BUILTIN_GATHERALTSIV4DI);
33686 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33687 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33688 IX86_BUILTIN_GATHERALTDIV8SI);
33691 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33692 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
33693 IX86_BUILTIN_GATHER3SIV16SF);
33695 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
33696 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
33697 IX86_BUILTIN_GATHER3SIV8DF);
33699 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
33700 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
33701 IX86_BUILTIN_GATHER3DIV16SF);
33703 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
33704 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
33705 IX86_BUILTIN_GATHER3DIV8DF);
33707 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
33708 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
33709 IX86_BUILTIN_GATHER3SIV16SI);
33711 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
33712 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
33713 IX86_BUILTIN_GATHER3SIV8DI);
33715 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
33716 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
33717 IX86_BUILTIN_GATHER3DIV16SI);
33719 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
33720 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
33721 IX86_BUILTIN_GATHER3DIV8DI);
33723 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
33724 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
33725 IX86_BUILTIN_GATHER3ALTSIV8DF);
33727 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
33728 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
33729 IX86_BUILTIN_GATHER3ALTDIV16SF);
33731 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
33732 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
33733 IX86_BUILTIN_GATHER3ALTSIV8DI);
33735 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
33736 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
33737 IX86_BUILTIN_GATHER3ALTDIV16SI);
33739 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
33740 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
33741 IX86_BUILTIN_SCATTERSIV16SF);
33743 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
33744 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
33745 IX86_BUILTIN_SCATTERSIV8DF);
33747 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
33748 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
33749 IX86_BUILTIN_SCATTERDIV16SF);
33751 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
33752 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
33753 IX86_BUILTIN_SCATTERDIV8DF);
33755 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
33756 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
33757 IX86_BUILTIN_SCATTERSIV16SI);
33759 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
33760 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
33761 IX86_BUILTIN_SCATTERSIV8DI);
33763 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
33764 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
33765 IX86_BUILTIN_SCATTERDIV16SI);
33767 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
33768 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
33769 IX86_BUILTIN_SCATTERDIV8DI);
33772 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
33773 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
33774 IX86_BUILTIN_GATHER3SIV2DF);
33776 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
33777 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
33778 IX86_BUILTIN_GATHER3SIV4DF);
33780 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
33781 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
33782 IX86_BUILTIN_GATHER3DIV2DF);
33784 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
33785 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
33786 IX86_BUILTIN_GATHER3DIV4DF);
33788 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
33789 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
33790 IX86_BUILTIN_GATHER3SIV4SF);
33792 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
33793 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
33794 IX86_BUILTIN_GATHER3SIV8SF);
33796 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
33797 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
33798 IX86_BUILTIN_GATHER3DIV4SF);
33800 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
33801 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
33802 IX86_BUILTIN_GATHER3DIV8SF);
33804 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
33805 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
33806 IX86_BUILTIN_GATHER3SIV2DI);
33808 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
33809 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
33810 IX86_BUILTIN_GATHER3SIV4DI);
33812 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
33813 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
33814 IX86_BUILTIN_GATHER3DIV2DI);
33816 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
33817 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
33818 IX86_BUILTIN_GATHER3DIV4DI);
33820 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
33821 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
33822 IX86_BUILTIN_GATHER3SIV4SI);
33824 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
33825 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
33826 IX86_BUILTIN_GATHER3SIV8SI);
33828 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
33829 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
33830 IX86_BUILTIN_GATHER3DIV4SI);
33832 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
33833 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
33834 IX86_BUILTIN_GATHER3DIV8SI);
33836 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
33837 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
33838 IX86_BUILTIN_GATHER3ALTSIV4DF);
33840 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
33841 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
33842 IX86_BUILTIN_GATHER3ALTDIV8SF);
33844 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
33845 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
33846 IX86_BUILTIN_GATHER3ALTSIV4DI);
33848 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
33849 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
33850 IX86_BUILTIN_GATHER3ALTDIV8SI);
33852 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
33853 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
33854 IX86_BUILTIN_SCATTERSIV8SF);
33856 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
33857 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
33858 IX86_BUILTIN_SCATTERSIV4SF);
33860 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
33861 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
33862 IX86_BUILTIN_SCATTERSIV4DF);
33864 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
33865 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
33866 IX86_BUILTIN_SCATTERSIV2DF);
33868 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
33869 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
33870 IX86_BUILTIN_SCATTERDIV8SF);
33872 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
33873 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
33874 IX86_BUILTIN_SCATTERDIV4SF);
33876 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
33877 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
33878 IX86_BUILTIN_SCATTERDIV4DF);
33880 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
33881 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
33882 IX86_BUILTIN_SCATTERDIV2DF);
33884 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
33885 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
33886 IX86_BUILTIN_SCATTERSIV8SI);
33888 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
33889 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
33890 IX86_BUILTIN_SCATTERSIV4SI);
33892 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
33893 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
33894 IX86_BUILTIN_SCATTERSIV4DI);
33896 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
33897 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
33898 IX86_BUILTIN_SCATTERSIV2DI);
33900 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
33901 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
33902 IX86_BUILTIN_SCATTERDIV8SI);
33904 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
33905 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
33906 IX86_BUILTIN_SCATTERDIV4SI);
33908 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
33909 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
33910 IX86_BUILTIN_SCATTERDIV4DI);
33912 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
33913 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
33914 IX86_BUILTIN_SCATTERDIV2DI);
33917 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
33918 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33919 IX86_BUILTIN_GATHERPFDPD);
33920 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
33921 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33922 IX86_BUILTIN_GATHERPFDPS);
33923 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
33924 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33925 IX86_BUILTIN_GATHERPFQPD);
33926 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
33927 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33928 IX86_BUILTIN_GATHERPFQPS);
33929 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
33930 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33931 IX86_BUILTIN_SCATTERPFDPD);
33932 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
33933 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33934 IX86_BUILTIN_SCATTERPFDPS);
33935 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
33936 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33937 IX86_BUILTIN_SCATTERPFQPD);
33938 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
33939 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33940 IX86_BUILTIN_SCATTERPFQPS);
33943 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
33944 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
33945 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
33946 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
33947 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
33948 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
33949 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
33950 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
33951 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
33952 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
33953 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
33954 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
33955 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
33956 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
33959 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
33960 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
33962 /* MMX access to the vec_init patterns. */
33963 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
33964 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
33966 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
33967 V4HI_FTYPE_HI_HI_HI_HI,
33968 IX86_BUILTIN_VEC_INIT_V4HI);
33970 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
33971 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
33972 IX86_BUILTIN_VEC_INIT_V8QI);
33974 /* Access to the vec_extract patterns. */
33975 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
33976 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
33977 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
33978 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
33979 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
33980 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
33981 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
33982 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
33983 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
33984 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
33986 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33987 "__builtin_ia32_vec_ext_v4hi",
33988 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
33990 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
33991 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
33993 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
33994 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
33996 /* Access to the vec_set patterns. */
33997 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
33998 "__builtin_ia32_vec_set_v2di",
33999 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
34001 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
34002 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
34004 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
34005 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
34007 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
34008 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
34010 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34011 "__builtin_ia32_vec_set_v4hi",
34012 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
34014 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
34015 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
34018 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
34019 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
34020 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
34021 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
34022 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
34023 "__builtin_ia32_rdseed_di_step",
34024 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
34027 def_builtin (0, "__builtin_ia32_addcarryx_u32",
34028 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
34029 def_builtin (OPTION_MASK_ISA_64BIT,
34030 "__builtin_ia32_addcarryx_u64",
34031 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34032 IX86_BUILTIN_ADDCARRYX64);
34035 def_builtin (0, "__builtin_ia32_sbb_u32",
34036 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
34037 def_builtin (OPTION_MASK_ISA_64BIT,
34038 "__builtin_ia32_sbb_u64",
34039 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34040 IX86_BUILTIN_SBB64);
34042 /* Read/write FLAGS. */
34043 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
34044 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34045 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
34046 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34047 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
34048 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34049 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34050 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34053 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34054 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34057 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34058 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34060 /* Add FMA4 multi-arg argument instructions */
34061 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34066 ftype = (enum ix86_builtin_func_type) d->flag;
34067 def_builtin_const (d->mask, d->name, ftype, d->code);
34072 ix86_init_mpx_builtins ()
34074 const struct builtin_description * d;
34075 enum ix86_builtin_func_type ftype;
34079 for (i = 0, d = bdesc_mpx;
34080 i < ARRAY_SIZE (bdesc_mpx);
34086 ftype = (enum ix86_builtin_func_type) d->flag;
34087 decl = def_builtin (d->mask, d->name, ftype, d->code);
34089 /* With no leaf and nothrow flags for MPX builtins
34090 abnormal edges may follow its call when setjmp
34091 presents in the function. Since we may have a lot
34092 of MPX builtins calls it causes lots of useless
34093 edges and enormous PHI nodes. To avoid this we mark
34094 MPX builtins as leaf and nothrow. */
34097 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34099 TREE_NOTHROW (decl) = 1;
34103 ix86_builtins_isa[(int)d->code].leaf_p = true;
34104 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34108 for (i = 0, d = bdesc_mpx_const;
34109 i < ARRAY_SIZE (bdesc_mpx_const);
34115 ftype = (enum ix86_builtin_func_type) d->flag;
34116 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34120 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34122 TREE_NOTHROW (decl) = 1;
34126 ix86_builtins_isa[(int)d->code].leaf_p = true;
34127 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34132 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34133 to return a pointer to VERSION_DECL if the outcome of the expression
34134 formed by PREDICATE_CHAIN is true. This function will be called during
34135 version dispatch to decide which function version to execute. It returns
34136 the basic block at the end, to which more conditions can be added. */
34139 add_condition_to_bb (tree function_decl, tree version_decl,
34140 tree predicate_chain, basic_block new_bb)
34142 gimple return_stmt;
34143 tree convert_expr, result_var;
34144 gimple convert_stmt;
34145 gimple call_cond_stmt;
34146 gimple if_else_stmt;
34148 basic_block bb1, bb2, bb3;
34151 tree cond_var, and_expr_var = NULL_TREE;
34154 tree predicate_decl, predicate_arg;
34156 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34158 gcc_assert (new_bb != NULL);
34159 gseq = bb_seq (new_bb);
34162 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34163 build_fold_addr_expr (version_decl));
34164 result_var = create_tmp_var (ptr_type_node);
34165 convert_stmt = gimple_build_assign (result_var, convert_expr);
34166 return_stmt = gimple_build_return (result_var);
34168 if (predicate_chain == NULL_TREE)
34170 gimple_seq_add_stmt (&gseq, convert_stmt);
34171 gimple_seq_add_stmt (&gseq, return_stmt);
34172 set_bb_seq (new_bb, gseq);
34173 gimple_set_bb (convert_stmt, new_bb);
34174 gimple_set_bb (return_stmt, new_bb);
34179 while (predicate_chain != NULL)
34181 cond_var = create_tmp_var (integer_type_node);
34182 predicate_decl = TREE_PURPOSE (predicate_chain);
34183 predicate_arg = TREE_VALUE (predicate_chain);
34184 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34185 gimple_call_set_lhs (call_cond_stmt, cond_var);
34187 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34188 gimple_set_bb (call_cond_stmt, new_bb);
34189 gimple_seq_add_stmt (&gseq, call_cond_stmt);
34191 predicate_chain = TREE_CHAIN (predicate_chain);
34193 if (and_expr_var == NULL)
34194 and_expr_var = cond_var;
34197 gimple assign_stmt;
34198 /* Use MIN_EXPR to check if any integer is zero?.
34199 and_expr_var = min_expr <cond_var, and_expr_var> */
34200 assign_stmt = gimple_build_assign (and_expr_var,
34201 build2 (MIN_EXPR, integer_type_node,
34202 cond_var, and_expr_var));
34204 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34205 gimple_set_bb (assign_stmt, new_bb);
34206 gimple_seq_add_stmt (&gseq, assign_stmt);
34210 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34212 NULL_TREE, NULL_TREE);
34213 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34214 gimple_set_bb (if_else_stmt, new_bb);
34215 gimple_seq_add_stmt (&gseq, if_else_stmt);
34217 gimple_seq_add_stmt (&gseq, convert_stmt);
34218 gimple_seq_add_stmt (&gseq, return_stmt);
34219 set_bb_seq (new_bb, gseq);
34222 e12 = split_block (bb1, if_else_stmt);
34224 e12->flags &= ~EDGE_FALLTHRU;
34225 e12->flags |= EDGE_TRUE_VALUE;
34227 e23 = split_block (bb2, return_stmt);
34229 gimple_set_bb (convert_stmt, bb2);
34230 gimple_set_bb (return_stmt, bb2);
34233 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34236 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34243 /* This parses the attribute arguments to target in DECL and determines
34244 the right builtin to use to match the platform specification.
34245 It returns the priority value for this version decl. If PREDICATE_LIST
34246 is not NULL, it stores the list of cpu features that need to be checked
34247 before dispatching this function. */
34249 static unsigned int
34250 get_builtin_code_for_version (tree decl, tree *predicate_list)
34253 struct cl_target_option cur_target;
34255 struct cl_target_option *new_target;
34256 const char *arg_str = NULL;
34257 const char *attrs_str = NULL;
34258 char *tok_str = NULL;
34261 /* Priority of i386 features, greater value is higher priority. This is
34262 used to decide the order in which function dispatch must happen. For
34263 instance, a version specialized for SSE4.2 should be checked for dispatch
34264 before a version for SSE3, as SSE4.2 implies SSE3. */
34265 enum feature_priority
34293 enum feature_priority priority = P_ZERO;
34295 /* These are the target attribute strings for which a dispatcher is
34296 available, from fold_builtin_cpu. */
34298 static struct _feature_list
34300 const char *const name;
34301 const enum feature_priority priority;
34303 const feature_list[] =
34309 {"sse4a", P_SSE4_A},
34310 {"ssse3", P_SSSE3},
34311 {"sse4.1", P_SSE4_1},
34312 {"sse4.2", P_SSE4_2},
34313 {"popcnt", P_POPCNT},
34319 {"avx512f", P_AVX512F}
34323 static unsigned int NUM_FEATURES
34324 = sizeof (feature_list) / sizeof (struct _feature_list);
34328 tree predicate_chain = NULL_TREE;
34329 tree predicate_decl, predicate_arg;
34331 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34332 gcc_assert (attrs != NULL);
34334 attrs = TREE_VALUE (TREE_VALUE (attrs));
34336 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34337 attrs_str = TREE_STRING_POINTER (attrs);
34339 /* Return priority zero for default function. */
34340 if (strcmp (attrs_str, "default") == 0)
34343 /* Handle arch= if specified. For priority, set it to be 1 more than
34344 the best instruction set the processor can handle. For instance, if
34345 there is a version for atom and a version for ssse3 (the highest ISA
34346 priority for atom), the atom version must be checked for dispatch
34347 before the ssse3 version. */
34348 if (strstr (attrs_str, "arch=") != NULL)
34350 cl_target_option_save (&cur_target, &global_options);
34351 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34352 &global_options_set);
34354 gcc_assert (target_node);
34355 new_target = TREE_TARGET_OPTION (target_node);
34356 gcc_assert (new_target);
34358 if (new_target->arch_specified && new_target->arch > 0)
34360 switch (new_target->arch)
34362 case PROCESSOR_CORE2:
34364 priority = P_PROC_SSSE3;
34366 case PROCESSOR_NEHALEM:
34367 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34368 arg_str = "westmere";
34370 /* We translate "arch=corei7" and "arch=nehalem" to
34371 "corei7" so that it will be mapped to M_INTEL_COREI7
34372 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34373 arg_str = "corei7";
34374 priority = P_PROC_SSE4_2;
34376 case PROCESSOR_SANDYBRIDGE:
34377 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34378 arg_str = "ivybridge";
34380 arg_str = "sandybridge";
34381 priority = P_PROC_AVX;
34383 case PROCESSOR_HASWELL:
34384 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34385 arg_str = "broadwell";
34387 arg_str = "haswell";
34388 priority = P_PROC_AVX2;
34390 case PROCESSOR_BONNELL:
34391 arg_str = "bonnell";
34392 priority = P_PROC_SSSE3;
34394 case PROCESSOR_KNL:
34396 priority = P_PROC_AVX512F;
34398 case PROCESSOR_SILVERMONT:
34399 arg_str = "silvermont";
34400 priority = P_PROC_SSE4_2;
34402 case PROCESSOR_AMDFAM10:
34403 arg_str = "amdfam10h";
34404 priority = P_PROC_SSE4_A;
34406 case PROCESSOR_BTVER1:
34407 arg_str = "btver1";
34408 priority = P_PROC_SSE4_A;
34410 case PROCESSOR_BTVER2:
34411 arg_str = "btver2";
34412 priority = P_PROC_AVX;
34414 case PROCESSOR_BDVER1:
34415 arg_str = "bdver1";
34416 priority = P_PROC_XOP;
34418 case PROCESSOR_BDVER2:
34419 arg_str = "bdver2";
34420 priority = P_PROC_FMA;
34422 case PROCESSOR_BDVER3:
34423 arg_str = "bdver3";
34424 priority = P_PROC_FMA;
34426 case PROCESSOR_BDVER4:
34427 arg_str = "bdver4";
34428 priority = P_PROC_AVX2;
34433 cl_target_option_restore (&global_options, &cur_target);
34435 if (predicate_list && arg_str == NULL)
34437 error_at (DECL_SOURCE_LOCATION (decl),
34438 "No dispatcher found for the versioning attributes");
34442 if (predicate_list)
34444 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34445 /* For a C string literal the length includes the trailing NULL. */
34446 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34447 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34452 /* Process feature name. */
34453 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34454 strcpy (tok_str, attrs_str);
34455 token = strtok (tok_str, ",");
34456 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34458 while (token != NULL)
34460 /* Do not process "arch=" */
34461 if (strncmp (token, "arch=", 5) == 0)
34463 token = strtok (NULL, ",");
34466 for (i = 0; i < NUM_FEATURES; ++i)
34468 if (strcmp (token, feature_list[i].name) == 0)
34470 if (predicate_list)
34472 predicate_arg = build_string_literal (
34473 strlen (feature_list[i].name) + 1,
34474 feature_list[i].name);
34475 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34478 /* Find the maximum priority feature. */
34479 if (feature_list[i].priority > priority)
34480 priority = feature_list[i].priority;
34485 if (predicate_list && i == NUM_FEATURES)
34487 error_at (DECL_SOURCE_LOCATION (decl),
34488 "No dispatcher found for %s", token);
34491 token = strtok (NULL, ",");
34495 if (predicate_list && predicate_chain == NULL_TREE)
34497 error_at (DECL_SOURCE_LOCATION (decl),
34498 "No dispatcher found for the versioning attributes : %s",
34502 else if (predicate_list)
34504 predicate_chain = nreverse (predicate_chain);
34505 *predicate_list = predicate_chain;
34511 /* This compares the priority of target features in function DECL1
34512 and DECL2. It returns positive value if DECL1 is higher priority,
34513 negative value if DECL2 is higher priority and 0 if they are the
34517 ix86_compare_version_priority (tree decl1, tree decl2)
34519 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34520 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34522 return (int)priority1 - (int)priority2;
34525 /* V1 and V2 point to function versions with different priorities
34526 based on the target ISA. This function compares their priorities. */
34529 feature_compare (const void *v1, const void *v2)
34531 typedef struct _function_version_info
34534 tree predicate_chain;
34535 unsigned int dispatch_priority;
34536 } function_version_info;
34538 const function_version_info c1 = *(const function_version_info *)v1;
34539 const function_version_info c2 = *(const function_version_info *)v2;
34540 return (c2.dispatch_priority - c1.dispatch_priority);
34543 /* This function generates the dispatch function for
34544 multi-versioned functions. DISPATCH_DECL is the function which will
34545 contain the dispatch logic. FNDECLS are the function choices for
34546 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34547 in DISPATCH_DECL in which the dispatch code is generated. */
34550 dispatch_function_versions (tree dispatch_decl,
34552 basic_block *empty_bb)
34555 gimple ifunc_cpu_init_stmt;
34559 vec<tree> *fndecls;
34560 unsigned int num_versions = 0;
34561 unsigned int actual_versions = 0;
34564 struct _function_version_info
34567 tree predicate_chain;
34568 unsigned int dispatch_priority;
34569 }*function_version_info;
34571 gcc_assert (dispatch_decl != NULL
34572 && fndecls_p != NULL
34573 && empty_bb != NULL);
34575 /*fndecls_p is actually a vector. */
34576 fndecls = static_cast<vec<tree> *> (fndecls_p);
34578 /* At least one more version other than the default. */
34579 num_versions = fndecls->length ();
34580 gcc_assert (num_versions >= 2);
34582 function_version_info = (struct _function_version_info *)
34583 XNEWVEC (struct _function_version_info, (num_versions - 1));
34585 /* The first version in the vector is the default decl. */
34586 default_decl = (*fndecls)[0];
34588 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34590 gseq = bb_seq (*empty_bb);
34591 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34592 constructors, so explicity call __builtin_cpu_init here. */
34593 ifunc_cpu_init_stmt = gimple_build_call_vec (
34594 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34595 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34596 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34597 set_bb_seq (*empty_bb, gseq);
34602 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34604 tree version_decl = ele;
34605 tree predicate_chain = NULL_TREE;
34606 unsigned int priority;
34607 /* Get attribute string, parse it and find the right predicate decl.
34608 The predicate function could be a lengthy combination of many
34609 features, like arch-type and various isa-variants. */
34610 priority = get_builtin_code_for_version (version_decl,
34613 if (predicate_chain == NULL_TREE)
34616 function_version_info [actual_versions].version_decl = version_decl;
34617 function_version_info [actual_versions].predicate_chain
34619 function_version_info [actual_versions].dispatch_priority = priority;
34623 /* Sort the versions according to descending order of dispatch priority. The
34624 priority is based on the ISA. This is not a perfect solution. There
34625 could still be ambiguity. If more than one function version is suitable
34626 to execute, which one should be dispatched? In future, allow the user
34627 to specify a dispatch priority next to the version. */
34628 qsort (function_version_info, actual_versions,
34629 sizeof (struct _function_version_info), feature_compare);
34631 for (i = 0; i < actual_versions; ++i)
34632 *empty_bb = add_condition_to_bb (dispatch_decl,
34633 function_version_info[i].version_decl,
34634 function_version_info[i].predicate_chain,
34637 /* dispatch default version at the end. */
34638 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34641 free (function_version_info);
34645 /* Comparator function to be used in qsort routine to sort attribute
34646 specification strings to "target". */
34649 attr_strcmp (const void *v1, const void *v2)
34651 const char *c1 = *(char *const*)v1;
34652 const char *c2 = *(char *const*)v2;
34653 return strcmp (c1, c2);
34656 /* ARGLIST is the argument to target attribute. This function tokenizes
34657 the comma separated arguments, sorts them and returns a string which
34658 is a unique identifier for the comma separated arguments. It also
34659 replaces non-identifier characters "=,-" with "_". */
34662 sorted_attr_string (tree arglist)
34665 size_t str_len_sum = 0;
34666 char **args = NULL;
34667 char *attr_str, *ret_str;
34669 unsigned int argnum = 1;
34672 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34674 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34675 size_t len = strlen (str);
34676 str_len_sum += len + 1;
34677 if (arg != arglist)
34679 for (i = 0; i < strlen (str); i++)
34684 attr_str = XNEWVEC (char, str_len_sum);
34686 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34688 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34689 size_t len = strlen (str);
34690 memcpy (attr_str + str_len_sum, str, len);
34691 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
34692 str_len_sum += len + 1;
34695 /* Replace "=,-" with "_". */
34696 for (i = 0; i < strlen (attr_str); i++)
34697 if (attr_str[i] == '=' || attr_str[i]== '-')
34703 args = XNEWVEC (char *, argnum);
34706 attr = strtok (attr_str, ",");
34707 while (attr != NULL)
34711 attr = strtok (NULL, ",");
34714 qsort (args, argnum, sizeof (char *), attr_strcmp);
34716 ret_str = XNEWVEC (char, str_len_sum);
34718 for (i = 0; i < argnum; i++)
34720 size_t len = strlen (args[i]);
34721 memcpy (ret_str + str_len_sum, args[i], len);
34722 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
34723 str_len_sum += len + 1;
34727 XDELETEVEC (attr_str);
34731 /* This function changes the assembler name for functions that are
34732 versions. If DECL is a function version and has a "target"
34733 attribute, it appends the attribute string to its assembler name. */
34736 ix86_mangle_function_version_assembler_name (tree decl, tree id)
34739 const char *orig_name, *version_string;
34740 char *attr_str, *assembler_name;
34742 if (DECL_DECLARED_INLINE_P (decl)
34743 && lookup_attribute ("gnu_inline",
34744 DECL_ATTRIBUTES (decl)))
34745 error_at (DECL_SOURCE_LOCATION (decl),
34746 "Function versions cannot be marked as gnu_inline,"
34747 " bodies have to be generated");
34749 if (DECL_VIRTUAL_P (decl)
34750 || DECL_VINDEX (decl))
34751 sorry ("Virtual function multiversioning not supported");
34753 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34755 /* target attribute string cannot be NULL. */
34756 gcc_assert (version_attr != NULL_TREE);
34758 orig_name = IDENTIFIER_POINTER (id);
34760 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
34762 if (strcmp (version_string, "default") == 0)
34765 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
34766 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
34768 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
34770 /* Allow assembler name to be modified if already set. */
34771 if (DECL_ASSEMBLER_NAME_SET_P (decl))
34772 SET_DECL_RTL (decl, NULL);
34774 tree ret = get_identifier (assembler_name);
34775 XDELETEVEC (attr_str);
34776 XDELETEVEC (assembler_name);
34780 /* This function returns true if FN1 and FN2 are versions of the same function,
34781 that is, the target strings of the function decls are different. This assumes
34782 that FN1 and FN2 have the same signature. */
34785 ix86_function_versions (tree fn1, tree fn2)
34788 char *target1, *target2;
34791 if (TREE_CODE (fn1) != FUNCTION_DECL
34792 || TREE_CODE (fn2) != FUNCTION_DECL)
34795 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
34796 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
34798 /* At least one function decl should have the target attribute specified. */
34799 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
34802 /* Diagnose missing target attribute if one of the decls is already
34803 multi-versioned. */
34804 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
34806 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
34808 if (attr2 != NULL_TREE)
34815 error_at (DECL_SOURCE_LOCATION (fn2),
34816 "missing %<target%> attribute for multi-versioned %D",
34818 inform (DECL_SOURCE_LOCATION (fn1),
34819 "previous declaration of %D", fn1);
34820 /* Prevent diagnosing of the same error multiple times. */
34821 DECL_ATTRIBUTES (fn2)
34822 = tree_cons (get_identifier ("target"),
34823 copy_node (TREE_VALUE (attr1)),
34824 DECL_ATTRIBUTES (fn2));
34829 target1 = sorted_attr_string (TREE_VALUE (attr1));
34830 target2 = sorted_attr_string (TREE_VALUE (attr2));
34832 /* The sorted target strings must be different for fn1 and fn2
34834 if (strcmp (target1, target2) == 0)
34839 XDELETEVEC (target1);
34840 XDELETEVEC (target2);
34846 ix86_mangle_decl_assembler_name (tree decl, tree id)
34848 /* For function version, add the target suffix to the assembler name. */
34849 if (TREE_CODE (decl) == FUNCTION_DECL
34850 && DECL_FUNCTION_VERSIONED (decl))
34851 id = ix86_mangle_function_version_assembler_name (decl, id);
34852 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
34853 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
34859 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
34860 is true, append the full path name of the source file. */
34863 make_name (tree decl, const char *suffix, bool make_unique)
34865 char *global_var_name;
34868 const char *unique_name = NULL;
34870 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
34872 /* Get a unique name that can be used globally without any chances
34873 of collision at link time. */
34875 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
34877 name_len = strlen (name) + strlen (suffix) + 2;
34880 name_len += strlen (unique_name) + 1;
34881 global_var_name = XNEWVEC (char, name_len);
34883 /* Use '.' to concatenate names as it is demangler friendly. */
34885 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
34888 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
34890 return global_var_name;
34893 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
34895 /* Make a dispatcher declaration for the multi-versioned function DECL.
34896 Calls to DECL function will be replaced with calls to the dispatcher
34897 by the front-end. Return the decl created. */
34900 make_dispatcher_decl (const tree decl)
34904 tree fn_type, func_type;
34905 bool is_uniq = false;
34907 if (TREE_PUBLIC (decl) == 0)
34910 func_name = make_name (decl, "ifunc", is_uniq);
34912 fn_type = TREE_TYPE (decl);
34913 func_type = build_function_type (TREE_TYPE (fn_type),
34914 TYPE_ARG_TYPES (fn_type));
34916 func_decl = build_fn_decl (func_name, func_type);
34917 XDELETEVEC (func_name);
34918 TREE_USED (func_decl) = 1;
34919 DECL_CONTEXT (func_decl) = NULL_TREE;
34920 DECL_INITIAL (func_decl) = error_mark_node;
34921 DECL_ARTIFICIAL (func_decl) = 1;
34922 /* Mark this func as external, the resolver will flip it again if
34923 it gets generated. */
34924 DECL_EXTERNAL (func_decl) = 1;
34925 /* This will be of type IFUNCs have to be externally visible. */
34926 TREE_PUBLIC (func_decl) = 1;
34933 /* Returns true if decl is multi-versioned and DECL is the default function,
34934 that is it is not tagged with target specific optimization. */
34937 is_function_default_version (const tree decl)
34939 if (TREE_CODE (decl) != FUNCTION_DECL
34940 || !DECL_FUNCTION_VERSIONED (decl))
34942 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34944 attr = TREE_VALUE (TREE_VALUE (attr));
34945 return (TREE_CODE (attr) == STRING_CST
34946 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
34949 /* Make a dispatcher declaration for the multi-versioned function DECL.
34950 Calls to DECL function will be replaced with calls to the dispatcher
34951 by the front-end. Returns the decl of the dispatcher function. */
34954 ix86_get_function_versions_dispatcher (void *decl)
34956 tree fn = (tree) decl;
34957 struct cgraph_node *node = NULL;
34958 struct cgraph_node *default_node = NULL;
34959 struct cgraph_function_version_info *node_v = NULL;
34960 struct cgraph_function_version_info *first_v = NULL;
34962 tree dispatch_decl = NULL;
34964 struct cgraph_function_version_info *default_version_info = NULL;
34966 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
34968 node = cgraph_node::get (fn);
34969 gcc_assert (node != NULL);
34971 node_v = node->function_version ();
34972 gcc_assert (node_v != NULL);
34974 if (node_v->dispatcher_resolver != NULL)
34975 return node_v->dispatcher_resolver;
34977 /* Find the default version and make it the first node. */
34979 /* Go to the beginning of the chain. */
34980 while (first_v->prev != NULL)
34981 first_v = first_v->prev;
34982 default_version_info = first_v;
34983 while (default_version_info != NULL)
34985 if (is_function_default_version
34986 (default_version_info->this_node->decl))
34988 default_version_info = default_version_info->next;
34991 /* If there is no default node, just return NULL. */
34992 if (default_version_info == NULL)
34995 /* Make default info the first node. */
34996 if (first_v != default_version_info)
34998 default_version_info->prev->next = default_version_info->next;
34999 if (default_version_info->next)
35000 default_version_info->next->prev = default_version_info->prev;
35001 first_v->prev = default_version_info;
35002 default_version_info->next = first_v;
35003 default_version_info->prev = NULL;
35006 default_node = default_version_info->this_node;
35008 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35009 if (targetm.has_ifunc_p ())
35011 struct cgraph_function_version_info *it_v = NULL;
35012 struct cgraph_node *dispatcher_node = NULL;
35013 struct cgraph_function_version_info *dispatcher_version_info = NULL;
35015 /* Right now, the dispatching is done via ifunc. */
35016 dispatch_decl = make_dispatcher_decl (default_node->decl);
35018 dispatcher_node = cgraph_node::get_create (dispatch_decl);
35019 gcc_assert (dispatcher_node != NULL);
35020 dispatcher_node->dispatcher_function = 1;
35021 dispatcher_version_info
35022 = dispatcher_node->insert_new_function_version ();
35023 dispatcher_version_info->next = default_version_info;
35024 dispatcher_node->definition = 1;
35026 /* Set the dispatcher for all the versions. */
35027 it_v = default_version_info;
35028 while (it_v != NULL)
35030 it_v->dispatcher_resolver = dispatch_decl;
35037 error_at (DECL_SOURCE_LOCATION (default_node->decl),
35038 "multiversioning needs ifunc which is not supported "
35042 return dispatch_decl;
35045 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
35049 make_attribute (const char *name, const char *arg_name, tree chain)
35052 tree attr_arg_name;
35056 attr_name = get_identifier (name);
35057 attr_arg_name = build_string (strlen (arg_name), arg_name);
35058 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35059 attr = tree_cons (attr_name, attr_args, chain);
35063 /* Make the resolver function decl to dispatch the versions of
35064 a multi-versioned function, DEFAULT_DECL. Create an
35065 empty basic block in the resolver and store the pointer in
35066 EMPTY_BB. Return the decl of the resolver function. */
35069 make_resolver_func (const tree default_decl,
35070 const tree dispatch_decl,
35071 basic_block *empty_bb)
35073 char *resolver_name;
35074 tree decl, type, decl_name, t;
35075 bool is_uniq = false;
35077 /* IFUNC's have to be globally visible. So, if the default_decl is
35078 not, then the name of the IFUNC should be made unique. */
35079 if (TREE_PUBLIC (default_decl) == 0)
35082 /* Append the filename to the resolver function if the versions are
35083 not externally visible. This is because the resolver function has
35084 to be externally visible for the loader to find it. So, appending
35085 the filename will prevent conflicts with a resolver function from
35086 another module which is based on the same version name. */
35087 resolver_name = make_name (default_decl, "resolver", is_uniq);
35089 /* The resolver function should return a (void *). */
35090 type = build_function_type_list (ptr_type_node, NULL_TREE);
35092 decl = build_fn_decl (resolver_name, type);
35093 decl_name = get_identifier (resolver_name);
35094 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35096 DECL_NAME (decl) = decl_name;
35097 TREE_USED (decl) = 1;
35098 DECL_ARTIFICIAL (decl) = 1;
35099 DECL_IGNORED_P (decl) = 0;
35100 /* IFUNC resolvers have to be externally visible. */
35101 TREE_PUBLIC (decl) = 1;
35102 DECL_UNINLINABLE (decl) = 1;
35104 /* Resolver is not external, body is generated. */
35105 DECL_EXTERNAL (decl) = 0;
35106 DECL_EXTERNAL (dispatch_decl) = 0;
35108 DECL_CONTEXT (decl) = NULL_TREE;
35109 DECL_INITIAL (decl) = make_node (BLOCK);
35110 DECL_STATIC_CONSTRUCTOR (decl) = 0;
35112 if (DECL_COMDAT_GROUP (default_decl)
35113 || TREE_PUBLIC (default_decl))
35115 /* In this case, each translation unit with a call to this
35116 versioned function will put out a resolver. Ensure it
35117 is comdat to keep just one copy. */
35118 DECL_COMDAT (decl) = 1;
35119 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35121 /* Build result decl and add to function_decl. */
35122 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35123 DECL_ARTIFICIAL (t) = 1;
35124 DECL_IGNORED_P (t) = 1;
35125 DECL_RESULT (decl) = t;
35127 gimplify_function_tree (decl);
35128 push_cfun (DECL_STRUCT_FUNCTION (decl));
35129 *empty_bb = init_lowered_empty_function (decl, false);
35131 cgraph_node::add_new_function (decl, true);
35132 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35136 gcc_assert (dispatch_decl != NULL);
35137 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
35138 DECL_ATTRIBUTES (dispatch_decl)
35139 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35141 /* Create the alias for dispatch to resolver here. */
35142 /*cgraph_create_function_alias (dispatch_decl, decl);*/
35143 cgraph_node::create_same_body_alias (dispatch_decl, decl);
35144 XDELETEVEC (resolver_name);
35148 /* Generate the dispatching code body to dispatch multi-versioned function
35149 DECL. The target hook is called to process the "target" attributes and
35150 provide the code to dispatch the right function at run-time. NODE points
35151 to the dispatcher decl whose body will be created. */
35154 ix86_generate_version_dispatcher_body (void *node_p)
35156 tree resolver_decl;
35157 basic_block empty_bb;
35158 tree default_ver_decl;
35159 struct cgraph_node *versn;
35160 struct cgraph_node *node;
35162 struct cgraph_function_version_info *node_version_info = NULL;
35163 struct cgraph_function_version_info *versn_info = NULL;
35165 node = (cgraph_node *)node_p;
35167 node_version_info = node->function_version ();
35168 gcc_assert (node->dispatcher_function
35169 && node_version_info != NULL);
35171 if (node_version_info->dispatcher_resolver)
35172 return node_version_info->dispatcher_resolver;
35174 /* The first version in the chain corresponds to the default version. */
35175 default_ver_decl = node_version_info->next->this_node->decl;
35177 /* node is going to be an alias, so remove the finalized bit. */
35178 node->definition = false;
35180 resolver_decl = make_resolver_func (default_ver_decl,
35181 node->decl, &empty_bb);
35183 node_version_info->dispatcher_resolver = resolver_decl;
35185 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35187 auto_vec<tree, 2> fn_ver_vec;
35189 for (versn_info = node_version_info->next; versn_info;
35190 versn_info = versn_info->next)
35192 versn = versn_info->this_node;
35193 /* Check for virtual functions here again, as by this time it should
35194 have been determined if this function needs a vtable index or
35195 not. This happens for methods in derived classes that override
35196 virtual methods in base classes but are not explicitly marked as
35198 if (DECL_VINDEX (versn->decl))
35199 sorry ("Virtual function multiversioning not supported");
35201 fn_ver_vec.safe_push (versn->decl);
35204 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35205 cgraph_edge::rebuild_edges ();
35207 return resolver_decl;
35209 /* This builds the processor_model struct type defined in
35210 libgcc/config/i386/cpuinfo.c */
35213 build_processor_model_struct (void)
35215 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35217 tree field = NULL_TREE, field_chain = NULL_TREE;
35219 tree type = make_node (RECORD_TYPE);
35221 /* The first 3 fields are unsigned int. */
35222 for (i = 0; i < 3; ++i)
35224 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35225 get_identifier (field_name[i]), unsigned_type_node);
35226 if (field_chain != NULL_TREE)
35227 DECL_CHAIN (field) = field_chain;
35228 field_chain = field;
35231 /* The last field is an array of unsigned integers of size one. */
35232 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35233 get_identifier (field_name[3]),
35234 build_array_type (unsigned_type_node,
35235 build_index_type (size_one_node)));
35236 if (field_chain != NULL_TREE)
35237 DECL_CHAIN (field) = field_chain;
35238 field_chain = field;
35240 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35244 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35247 make_var_decl (tree type, const char *name)
35251 new_decl = build_decl (UNKNOWN_LOCATION,
35253 get_identifier(name),
35256 DECL_EXTERNAL (new_decl) = 1;
35257 TREE_STATIC (new_decl) = 1;
35258 TREE_PUBLIC (new_decl) = 1;
35259 DECL_INITIAL (new_decl) = 0;
35260 DECL_ARTIFICIAL (new_decl) = 0;
35261 DECL_PRESERVE_P (new_decl) = 1;
35263 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35264 assemble_variable (new_decl, 0, 0, 0);
35269 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35270 into an integer defined in libgcc/config/i386/cpuinfo.c */
35273 fold_builtin_cpu (tree fndecl, tree *args)
35276 enum ix86_builtins fn_code = (enum ix86_builtins)
35277 DECL_FUNCTION_CODE (fndecl);
35278 tree param_string_cst = NULL;
35280 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35281 enum processor_features
35302 /* These are the values for vendor types and cpu types and subtypes
35303 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35304 the corresponding start value. */
35305 enum processor_model
35315 M_INTEL_SILVERMONT,
35319 M_CPU_SUBTYPE_START,
35320 M_INTEL_COREI7_NEHALEM,
35321 M_INTEL_COREI7_WESTMERE,
35322 M_INTEL_COREI7_SANDYBRIDGE,
35323 M_AMDFAM10H_BARCELONA,
35324 M_AMDFAM10H_SHANGHAI,
35325 M_AMDFAM10H_ISTANBUL,
35326 M_AMDFAM15H_BDVER1,
35327 M_AMDFAM15H_BDVER2,
35328 M_AMDFAM15H_BDVER3,
35329 M_AMDFAM15H_BDVER4,
35330 M_INTEL_COREI7_IVYBRIDGE,
35331 M_INTEL_COREI7_HASWELL
35334 static struct _arch_names_table
35336 const char *const name;
35337 const enum processor_model model;
35339 const arch_names_table[] =
35342 {"intel", M_INTEL},
35343 {"atom", M_INTEL_BONNELL},
35344 {"slm", M_INTEL_SILVERMONT},
35345 {"core2", M_INTEL_CORE2},
35346 {"corei7", M_INTEL_COREI7},
35347 {"nehalem", M_INTEL_COREI7_NEHALEM},
35348 {"westmere", M_INTEL_COREI7_WESTMERE},
35349 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35350 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35351 {"haswell", M_INTEL_COREI7_HASWELL},
35352 {"bonnell", M_INTEL_BONNELL},
35353 {"silvermont", M_INTEL_SILVERMONT},
35354 {"knl", M_INTEL_KNL},
35355 {"amdfam10h", M_AMDFAM10H},
35356 {"barcelona", M_AMDFAM10H_BARCELONA},
35357 {"shanghai", M_AMDFAM10H_SHANGHAI},
35358 {"istanbul", M_AMDFAM10H_ISTANBUL},
35359 {"btver1", M_AMD_BTVER1},
35360 {"amdfam15h", M_AMDFAM15H},
35361 {"bdver1", M_AMDFAM15H_BDVER1},
35362 {"bdver2", M_AMDFAM15H_BDVER2},
35363 {"bdver3", M_AMDFAM15H_BDVER3},
35364 {"bdver4", M_AMDFAM15H_BDVER4},
35365 {"btver2", M_AMD_BTVER2},
35368 static struct _isa_names_table
35370 const char *const name;
35371 const enum processor_features feature;
35373 const isa_names_table[] =
35377 {"popcnt", F_POPCNT},
35381 {"ssse3", F_SSSE3},
35382 {"sse4a", F_SSE4_A},
35383 {"sse4.1", F_SSE4_1},
35384 {"sse4.2", F_SSE4_2},
35390 {"avx512f",F_AVX512F}
35393 tree __processor_model_type = build_processor_model_struct ();
35394 tree __cpu_model_var = make_var_decl (__processor_model_type,
35398 varpool_node::add (__cpu_model_var);
35400 gcc_assert ((args != NULL) && (*args != NULL));
35402 param_string_cst = *args;
35403 while (param_string_cst
35404 && TREE_CODE (param_string_cst) != STRING_CST)
35406 /* *args must be a expr that can contain other EXPRS leading to a
35408 if (!EXPR_P (param_string_cst))
35410 error ("Parameter to builtin must be a string constant or literal");
35411 return integer_zero_node;
35413 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35416 gcc_assert (param_string_cst);
35418 if (fn_code == IX86_BUILTIN_CPU_IS)
35424 unsigned int field_val = 0;
35425 unsigned int NUM_ARCH_NAMES
35426 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35428 for (i = 0; i < NUM_ARCH_NAMES; i++)
35429 if (strcmp (arch_names_table[i].name,
35430 TREE_STRING_POINTER (param_string_cst)) == 0)
35433 if (i == NUM_ARCH_NAMES)
35435 error ("Parameter to builtin not valid: %s",
35436 TREE_STRING_POINTER (param_string_cst));
35437 return integer_zero_node;
35440 field = TYPE_FIELDS (__processor_model_type);
35441 field_val = arch_names_table[i].model;
35443 /* CPU types are stored in the next field. */
35444 if (field_val > M_CPU_TYPE_START
35445 && field_val < M_CPU_SUBTYPE_START)
35447 field = DECL_CHAIN (field);
35448 field_val -= M_CPU_TYPE_START;
35451 /* CPU subtypes are stored in the next field. */
35452 if (field_val > M_CPU_SUBTYPE_START)
35454 field = DECL_CHAIN ( DECL_CHAIN (field));
35455 field_val -= M_CPU_SUBTYPE_START;
35458 /* Get the appropriate field in __cpu_model. */
35459 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35462 /* Check the value. */
35463 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35464 build_int_cstu (unsigned_type_node, field_val));
35465 return build1 (CONVERT_EXPR, integer_type_node, final);
35467 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35474 unsigned int field_val = 0;
35475 unsigned int NUM_ISA_NAMES
35476 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35478 for (i = 0; i < NUM_ISA_NAMES; i++)
35479 if (strcmp (isa_names_table[i].name,
35480 TREE_STRING_POINTER (param_string_cst)) == 0)
35483 if (i == NUM_ISA_NAMES)
35485 error ("Parameter to builtin not valid: %s",
35486 TREE_STRING_POINTER (param_string_cst));
35487 return integer_zero_node;
35490 field = TYPE_FIELDS (__processor_model_type);
35491 /* Get the last field, which is __cpu_features. */
35492 while (DECL_CHAIN (field))
35493 field = DECL_CHAIN (field);
35495 /* Get the appropriate field: __cpu_model.__cpu_features */
35496 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35499 /* Access the 0th element of __cpu_features array. */
35500 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35501 integer_zero_node, NULL_TREE, NULL_TREE);
35503 field_val = (1 << isa_names_table[i].feature);
35504 /* Return __cpu_model.__cpu_features[0] & field_val */
35505 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35506 build_int_cstu (unsigned_type_node, field_val));
35507 return build1 (CONVERT_EXPR, integer_type_node, final);
35509 gcc_unreachable ();
35513 ix86_fold_builtin (tree fndecl, int n_args,
35514 tree *args, bool ignore ATTRIBUTE_UNUSED)
35516 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35518 enum ix86_builtins fn_code = (enum ix86_builtins)
35519 DECL_FUNCTION_CODE (fndecl);
35520 if (fn_code == IX86_BUILTIN_CPU_IS
35521 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35523 gcc_assert (n_args == 1);
35524 return fold_builtin_cpu (fndecl, args);
35528 #ifdef SUBTARGET_FOLD_BUILTIN
35529 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35535 /* Make builtins to detect cpu type and features supported. NAME is
35536 the builtin name, CODE is the builtin code, and FTYPE is the function
35537 type of the builtin. */
35540 make_cpu_type_builtin (const char* name, int code,
35541 enum ix86_builtin_func_type ftype, bool is_const)
35546 type = ix86_get_builtin_func_type (ftype);
35547 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35549 gcc_assert (decl != NULL_TREE);
35550 ix86_builtins[(int) code] = decl;
35551 TREE_READONLY (decl) = is_const;
35554 /* Make builtins to get CPU type and features supported. The created
35557 __builtin_cpu_init (), to detect cpu type and features,
35558 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35559 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35563 ix86_init_platform_type_builtins (void)
35565 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35566 INT_FTYPE_VOID, false);
35567 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35568 INT_FTYPE_PCCHAR, true);
35569 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35570 INT_FTYPE_PCCHAR, true);
35573 /* Internal method for ix86_init_builtins. */
35576 ix86_init_builtins_va_builtins_abi (void)
35578 tree ms_va_ref, sysv_va_ref;
35579 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35580 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35581 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35582 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35586 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35587 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35588 ms_va_ref = build_reference_type (ms_va_list_type_node);
35590 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35593 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35594 fnvoid_va_start_ms =
35595 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35596 fnvoid_va_end_sysv =
35597 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35598 fnvoid_va_start_sysv =
35599 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35601 fnvoid_va_copy_ms =
35602 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35604 fnvoid_va_copy_sysv =
35605 build_function_type_list (void_type_node, sysv_va_ref,
35606 sysv_va_ref, NULL_TREE);
35608 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35609 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35610 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35611 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35612 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35613 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35614 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35615 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35616 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35617 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35618 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35619 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35623 ix86_init_builtin_types (void)
35625 tree float128_type_node, float80_type_node;
35627 /* The __float80 type. */
35628 float80_type_node = long_double_type_node;
35629 if (TYPE_MODE (float80_type_node) != XFmode)
35631 /* The __float80 type. */
35632 float80_type_node = make_node (REAL_TYPE);
35634 TYPE_PRECISION (float80_type_node) = 80;
35635 layout_type (float80_type_node);
35637 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35639 /* The __float128 type. */
35640 float128_type_node = make_node (REAL_TYPE);
35641 TYPE_PRECISION (float128_type_node) = 128;
35642 layout_type (float128_type_node);
35643 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35645 /* This macro is built by i386-builtin-types.awk. */
35646 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35650 ix86_init_builtins (void)
35654 ix86_init_builtin_types ();
35656 /* Builtins to get CPU type and features. */
35657 ix86_init_platform_type_builtins ();
35659 /* TFmode support builtins. */
35660 def_builtin_const (0, "__builtin_infq",
35661 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35662 def_builtin_const (0, "__builtin_huge_valq",
35663 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
35665 /* We will expand them to normal call if SSE isn't available since
35666 they are used by libgcc. */
35667 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
35668 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
35669 BUILT_IN_MD, "__fabstf2", NULL_TREE);
35670 TREE_READONLY (t) = 1;
35671 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
35673 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
35674 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
35675 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
35676 TREE_READONLY (t) = 1;
35677 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
35679 ix86_init_tm_builtins ();
35680 ix86_init_mmx_sse_builtins ();
35681 ix86_init_mpx_builtins ();
35684 ix86_init_builtins_va_builtins_abi ();
35686 #ifdef SUBTARGET_INIT_BUILTINS
35687 SUBTARGET_INIT_BUILTINS;
35691 /* Return the ix86 builtin for CODE. */
35694 ix86_builtin_decl (unsigned code, bool)
35696 if (code >= IX86_BUILTIN_MAX)
35697 return error_mark_node;
35699 return ix86_builtins[code];
35702 /* Errors in the source file can cause expand_expr to return const0_rtx
35703 where we expect a vector. To avoid crashing, use one of the vector
35704 clear instructions. */
35706 safe_vector_operand (rtx x, machine_mode mode)
35708 if (x == const0_rtx)
35709 x = CONST0_RTX (mode);
35713 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
35716 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
35719 tree arg0 = CALL_EXPR_ARG (exp, 0);
35720 tree arg1 = CALL_EXPR_ARG (exp, 1);
35721 rtx op0 = expand_normal (arg0);
35722 rtx op1 = expand_normal (arg1);
35723 machine_mode tmode = insn_data[icode].operand[0].mode;
35724 machine_mode mode0 = insn_data[icode].operand[1].mode;
35725 machine_mode mode1 = insn_data[icode].operand[2].mode;
35727 if (VECTOR_MODE_P (mode0))
35728 op0 = safe_vector_operand (op0, mode0);
35729 if (VECTOR_MODE_P (mode1))
35730 op1 = safe_vector_operand (op1, mode1);
35732 if (optimize || !target
35733 || GET_MODE (target) != tmode
35734 || !insn_data[icode].operand[0].predicate (target, tmode))
35735 target = gen_reg_rtx (tmode);
35737 if (GET_MODE (op1) == SImode && mode1 == TImode)
35739 rtx x = gen_reg_rtx (V4SImode);
35740 emit_insn (gen_sse2_loadd (x, op1));
35741 op1 = gen_lowpart (TImode, x);
35744 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35745 op0 = copy_to_mode_reg (mode0, op0);
35746 if (!insn_data[icode].operand[2].predicate (op1, mode1))
35747 op1 = copy_to_mode_reg (mode1, op1);
35749 pat = GEN_FCN (icode) (target, op0, op1);
35758 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
35761 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
35762 enum ix86_builtin_func_type m_type,
35763 enum rtx_code sub_code)
35768 bool comparison_p = false;
35770 bool last_arg_constant = false;
35771 int num_memory = 0;
35777 machine_mode tmode = insn_data[icode].operand[0].mode;
35781 case MULTI_ARG_4_DF2_DI_I:
35782 case MULTI_ARG_4_DF2_DI_I1:
35783 case MULTI_ARG_4_SF2_SI_I:
35784 case MULTI_ARG_4_SF2_SI_I1:
35786 last_arg_constant = true;
35789 case MULTI_ARG_3_SF:
35790 case MULTI_ARG_3_DF:
35791 case MULTI_ARG_3_SF2:
35792 case MULTI_ARG_3_DF2:
35793 case MULTI_ARG_3_DI:
35794 case MULTI_ARG_3_SI:
35795 case MULTI_ARG_3_SI_DI:
35796 case MULTI_ARG_3_HI:
35797 case MULTI_ARG_3_HI_SI:
35798 case MULTI_ARG_3_QI:
35799 case MULTI_ARG_3_DI2:
35800 case MULTI_ARG_3_SI2:
35801 case MULTI_ARG_3_HI2:
35802 case MULTI_ARG_3_QI2:
35806 case MULTI_ARG_2_SF:
35807 case MULTI_ARG_2_DF:
35808 case MULTI_ARG_2_DI:
35809 case MULTI_ARG_2_SI:
35810 case MULTI_ARG_2_HI:
35811 case MULTI_ARG_2_QI:
35815 case MULTI_ARG_2_DI_IMM:
35816 case MULTI_ARG_2_SI_IMM:
35817 case MULTI_ARG_2_HI_IMM:
35818 case MULTI_ARG_2_QI_IMM:
35820 last_arg_constant = true;
35823 case MULTI_ARG_1_SF:
35824 case MULTI_ARG_1_DF:
35825 case MULTI_ARG_1_SF2:
35826 case MULTI_ARG_1_DF2:
35827 case MULTI_ARG_1_DI:
35828 case MULTI_ARG_1_SI:
35829 case MULTI_ARG_1_HI:
35830 case MULTI_ARG_1_QI:
35831 case MULTI_ARG_1_SI_DI:
35832 case MULTI_ARG_1_HI_DI:
35833 case MULTI_ARG_1_HI_SI:
35834 case MULTI_ARG_1_QI_DI:
35835 case MULTI_ARG_1_QI_SI:
35836 case MULTI_ARG_1_QI_HI:
35840 case MULTI_ARG_2_DI_CMP:
35841 case MULTI_ARG_2_SI_CMP:
35842 case MULTI_ARG_2_HI_CMP:
35843 case MULTI_ARG_2_QI_CMP:
35845 comparison_p = true;
35848 case MULTI_ARG_2_SF_TF:
35849 case MULTI_ARG_2_DF_TF:
35850 case MULTI_ARG_2_DI_TF:
35851 case MULTI_ARG_2_SI_TF:
35852 case MULTI_ARG_2_HI_TF:
35853 case MULTI_ARG_2_QI_TF:
35859 gcc_unreachable ();
35862 if (optimize || !target
35863 || GET_MODE (target) != tmode
35864 || !insn_data[icode].operand[0].predicate (target, tmode))
35865 target = gen_reg_rtx (tmode);
35867 gcc_assert (nargs <= 4);
35869 for (i = 0; i < nargs; i++)
35871 tree arg = CALL_EXPR_ARG (exp, i);
35872 rtx op = expand_normal (arg);
35873 int adjust = (comparison_p) ? 1 : 0;
35874 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
35876 if (last_arg_constant && i == nargs - 1)
35878 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
35880 enum insn_code new_icode = icode;
35883 case CODE_FOR_xop_vpermil2v2df3:
35884 case CODE_FOR_xop_vpermil2v4sf3:
35885 case CODE_FOR_xop_vpermil2v4df3:
35886 case CODE_FOR_xop_vpermil2v8sf3:
35887 error ("the last argument must be a 2-bit immediate");
35888 return gen_reg_rtx (tmode);
35889 case CODE_FOR_xop_rotlv2di3:
35890 new_icode = CODE_FOR_rotlv2di3;
35892 case CODE_FOR_xop_rotlv4si3:
35893 new_icode = CODE_FOR_rotlv4si3;
35895 case CODE_FOR_xop_rotlv8hi3:
35896 new_icode = CODE_FOR_rotlv8hi3;
35898 case CODE_FOR_xop_rotlv16qi3:
35899 new_icode = CODE_FOR_rotlv16qi3;
35901 if (CONST_INT_P (op))
35903 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
35904 op = GEN_INT (INTVAL (op) & mask);
35905 gcc_checking_assert
35906 (insn_data[icode].operand[i + 1].predicate (op, mode));
35910 gcc_checking_assert
35912 && insn_data[new_icode].operand[0].mode == tmode
35913 && insn_data[new_icode].operand[1].mode == tmode
35914 && insn_data[new_icode].operand[2].mode == mode
35915 && insn_data[new_icode].operand[0].predicate
35916 == insn_data[icode].operand[0].predicate
35917 && insn_data[new_icode].operand[1].predicate
35918 == insn_data[icode].operand[1].predicate);
35924 gcc_unreachable ();
35931 if (VECTOR_MODE_P (mode))
35932 op = safe_vector_operand (op, mode);
35934 /* If we aren't optimizing, only allow one memory operand to be
35936 if (memory_operand (op, mode))
35939 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
35942 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
35944 op = force_reg (mode, op);
35948 args[i].mode = mode;
35954 pat = GEN_FCN (icode) (target, args[0].op);
35959 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35960 GEN_INT ((int)sub_code));
35961 else if (! comparison_p)
35962 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
35965 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
35969 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
35974 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
35978 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
35982 gcc_unreachable ();
35992 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
35993 insns with vec_merge. */
35996 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
36000 tree arg0 = CALL_EXPR_ARG (exp, 0);
36001 rtx op1, op0 = expand_normal (arg0);
36002 machine_mode tmode = insn_data[icode].operand[0].mode;
36003 machine_mode mode0 = insn_data[icode].operand[1].mode;
36005 if (optimize || !target
36006 || GET_MODE (target) != tmode
36007 || !insn_data[icode].operand[0].predicate (target, tmode))
36008 target = gen_reg_rtx (tmode);
36010 if (VECTOR_MODE_P (mode0))
36011 op0 = safe_vector_operand (op0, mode0);
36013 if ((optimize && !register_operand (op0, mode0))
36014 || !insn_data[icode].operand[1].predicate (op0, mode0))
36015 op0 = copy_to_mode_reg (mode0, op0);
36018 if (!insn_data[icode].operand[2].predicate (op1, mode0))
36019 op1 = copy_to_mode_reg (mode0, op1);
36021 pat = GEN_FCN (icode) (target, op0, op1);
36028 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
36031 ix86_expand_sse_compare (const struct builtin_description *d,
36032 tree exp, rtx target, bool swap)
36035 tree arg0 = CALL_EXPR_ARG (exp, 0);
36036 tree arg1 = CALL_EXPR_ARG (exp, 1);
36037 rtx op0 = expand_normal (arg0);
36038 rtx op1 = expand_normal (arg1);
36040 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36041 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36042 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36043 enum rtx_code comparison = d->comparison;
36045 if (VECTOR_MODE_P (mode0))
36046 op0 = safe_vector_operand (op0, mode0);
36047 if (VECTOR_MODE_P (mode1))
36048 op1 = safe_vector_operand (op1, mode1);
36050 /* Swap operands if we have a comparison that isn't available in
36053 std::swap (op0, op1);
36055 if (optimize || !target
36056 || GET_MODE (target) != tmode
36057 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36058 target = gen_reg_rtx (tmode);
36060 if ((optimize && !register_operand (op0, mode0))
36061 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36062 op0 = copy_to_mode_reg (mode0, op0);
36063 if ((optimize && !register_operand (op1, mode1))
36064 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36065 op1 = copy_to_mode_reg (mode1, op1);
36067 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36068 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36075 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
36078 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36082 tree arg0 = CALL_EXPR_ARG (exp, 0);
36083 tree arg1 = CALL_EXPR_ARG (exp, 1);
36084 rtx op0 = expand_normal (arg0);
36085 rtx op1 = expand_normal (arg1);
36086 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36087 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36088 enum rtx_code comparison = d->comparison;
36090 if (VECTOR_MODE_P (mode0))
36091 op0 = safe_vector_operand (op0, mode0);
36092 if (VECTOR_MODE_P (mode1))
36093 op1 = safe_vector_operand (op1, mode1);
36095 /* Swap operands if we have a comparison that isn't available in
36097 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36098 std::swap (op0, op1);
36100 target = gen_reg_rtx (SImode);
36101 emit_move_insn (target, const0_rtx);
36102 target = gen_rtx_SUBREG (QImode, target, 0);
36104 if ((optimize && !register_operand (op0, mode0))
36105 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36106 op0 = copy_to_mode_reg (mode0, op0);
36107 if ((optimize && !register_operand (op1, mode1))
36108 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36109 op1 = copy_to_mode_reg (mode1, op1);
36111 pat = GEN_FCN (d->icode) (op0, op1);
36115 emit_insn (gen_rtx_SET (VOIDmode,
36116 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36117 gen_rtx_fmt_ee (comparison, QImode,
36121 return SUBREG_REG (target);
36124 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
36127 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36131 tree arg0 = CALL_EXPR_ARG (exp, 0);
36132 rtx op1, op0 = expand_normal (arg0);
36133 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36134 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36136 if (optimize || target == 0
36137 || GET_MODE (target) != tmode
36138 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36139 target = gen_reg_rtx (tmode);
36141 if (VECTOR_MODE_P (mode0))
36142 op0 = safe_vector_operand (op0, mode0);
36144 if ((optimize && !register_operand (op0, mode0))
36145 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36146 op0 = copy_to_mode_reg (mode0, op0);
36148 op1 = GEN_INT (d->comparison);
36150 pat = GEN_FCN (d->icode) (target, op0, op1);
36158 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36159 tree exp, rtx target)
36162 tree arg0 = CALL_EXPR_ARG (exp, 0);
36163 tree arg1 = CALL_EXPR_ARG (exp, 1);
36164 rtx op0 = expand_normal (arg0);
36165 rtx op1 = expand_normal (arg1);
36167 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36168 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36169 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36171 if (optimize || target == 0
36172 || GET_MODE (target) != tmode
36173 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36174 target = gen_reg_rtx (tmode);
36176 op0 = safe_vector_operand (op0, mode0);
36177 op1 = safe_vector_operand (op1, mode1);
36179 if ((optimize && !register_operand (op0, mode0))
36180 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36181 op0 = copy_to_mode_reg (mode0, op0);
36182 if ((optimize && !register_operand (op1, mode1))
36183 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36184 op1 = copy_to_mode_reg (mode1, op1);
36186 op2 = GEN_INT (d->comparison);
36188 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36195 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
36198 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36202 tree arg0 = CALL_EXPR_ARG (exp, 0);
36203 tree arg1 = CALL_EXPR_ARG (exp, 1);
36204 rtx op0 = expand_normal (arg0);
36205 rtx op1 = expand_normal (arg1);
36206 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36207 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36208 enum rtx_code comparison = d->comparison;
36210 if (VECTOR_MODE_P (mode0))
36211 op0 = safe_vector_operand (op0, mode0);
36212 if (VECTOR_MODE_P (mode1))
36213 op1 = safe_vector_operand (op1, mode1);
36215 target = gen_reg_rtx (SImode);
36216 emit_move_insn (target, const0_rtx);
36217 target = gen_rtx_SUBREG (QImode, target, 0);
36219 if ((optimize && !register_operand (op0, mode0))
36220 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36221 op0 = copy_to_mode_reg (mode0, op0);
36222 if ((optimize && !register_operand (op1, mode1))
36223 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36224 op1 = copy_to_mode_reg (mode1, op1);
36226 pat = GEN_FCN (d->icode) (op0, op1);
36230 emit_insn (gen_rtx_SET (VOIDmode,
36231 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36232 gen_rtx_fmt_ee (comparison, QImode,
36236 return SUBREG_REG (target);
36239 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36242 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36243 tree exp, rtx target)
36246 tree arg0 = CALL_EXPR_ARG (exp, 0);
36247 tree arg1 = CALL_EXPR_ARG (exp, 1);
36248 tree arg2 = CALL_EXPR_ARG (exp, 2);
36249 tree arg3 = CALL_EXPR_ARG (exp, 3);
36250 tree arg4 = CALL_EXPR_ARG (exp, 4);
36251 rtx scratch0, scratch1;
36252 rtx op0 = expand_normal (arg0);
36253 rtx op1 = expand_normal (arg1);
36254 rtx op2 = expand_normal (arg2);
36255 rtx op3 = expand_normal (arg3);
36256 rtx op4 = expand_normal (arg4);
36257 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36259 tmode0 = insn_data[d->icode].operand[0].mode;
36260 tmode1 = insn_data[d->icode].operand[1].mode;
36261 modev2 = insn_data[d->icode].operand[2].mode;
36262 modei3 = insn_data[d->icode].operand[3].mode;
36263 modev4 = insn_data[d->icode].operand[4].mode;
36264 modei5 = insn_data[d->icode].operand[5].mode;
36265 modeimm = insn_data[d->icode].operand[6].mode;
36267 if (VECTOR_MODE_P (modev2))
36268 op0 = safe_vector_operand (op0, modev2);
36269 if (VECTOR_MODE_P (modev4))
36270 op2 = safe_vector_operand (op2, modev4);
36272 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36273 op0 = copy_to_mode_reg (modev2, op0);
36274 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36275 op1 = copy_to_mode_reg (modei3, op1);
36276 if ((optimize && !register_operand (op2, modev4))
36277 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36278 op2 = copy_to_mode_reg (modev4, op2);
36279 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36280 op3 = copy_to_mode_reg (modei5, op3);
36282 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36284 error ("the fifth argument must be an 8-bit immediate");
36288 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36290 if (optimize || !target
36291 || GET_MODE (target) != tmode0
36292 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36293 target = gen_reg_rtx (tmode0);
36295 scratch1 = gen_reg_rtx (tmode1);
36297 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36299 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36301 if (optimize || !target
36302 || GET_MODE (target) != tmode1
36303 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36304 target = gen_reg_rtx (tmode1);
36306 scratch0 = gen_reg_rtx (tmode0);
36308 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36312 gcc_assert (d->flag);
36314 scratch0 = gen_reg_rtx (tmode0);
36315 scratch1 = gen_reg_rtx (tmode1);
36317 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36327 target = gen_reg_rtx (SImode);
36328 emit_move_insn (target, const0_rtx);
36329 target = gen_rtx_SUBREG (QImode, target, 0);
36332 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36333 gen_rtx_fmt_ee (EQ, QImode,
36334 gen_rtx_REG ((machine_mode) d->flag,
36337 return SUBREG_REG (target);
36344 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36347 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36348 tree exp, rtx target)
36351 tree arg0 = CALL_EXPR_ARG (exp, 0);
36352 tree arg1 = CALL_EXPR_ARG (exp, 1);
36353 tree arg2 = CALL_EXPR_ARG (exp, 2);
36354 rtx scratch0, scratch1;
36355 rtx op0 = expand_normal (arg0);
36356 rtx op1 = expand_normal (arg1);
36357 rtx op2 = expand_normal (arg2);
36358 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36360 tmode0 = insn_data[d->icode].operand[0].mode;
36361 tmode1 = insn_data[d->icode].operand[1].mode;
36362 modev2 = insn_data[d->icode].operand[2].mode;
36363 modev3 = insn_data[d->icode].operand[3].mode;
36364 modeimm = insn_data[d->icode].operand[4].mode;
36366 if (VECTOR_MODE_P (modev2))
36367 op0 = safe_vector_operand (op0, modev2);
36368 if (VECTOR_MODE_P (modev3))
36369 op1 = safe_vector_operand (op1, modev3);
36371 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36372 op0 = copy_to_mode_reg (modev2, op0);
36373 if ((optimize && !register_operand (op1, modev3))
36374 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36375 op1 = copy_to_mode_reg (modev3, op1);
36377 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36379 error ("the third argument must be an 8-bit immediate");
36383 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36385 if (optimize || !target
36386 || GET_MODE (target) != tmode0
36387 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36388 target = gen_reg_rtx (tmode0);
36390 scratch1 = gen_reg_rtx (tmode1);
36392 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36394 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36396 if (optimize || !target
36397 || GET_MODE (target) != tmode1
36398 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36399 target = gen_reg_rtx (tmode1);
36401 scratch0 = gen_reg_rtx (tmode0);
36403 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36407 gcc_assert (d->flag);
36409 scratch0 = gen_reg_rtx (tmode0);
36410 scratch1 = gen_reg_rtx (tmode1);
36412 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36422 target = gen_reg_rtx (SImode);
36423 emit_move_insn (target, const0_rtx);
36424 target = gen_rtx_SUBREG (QImode, target, 0);
36427 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36428 gen_rtx_fmt_ee (EQ, QImode,
36429 gen_rtx_REG ((machine_mode) d->flag,
36432 return SUBREG_REG (target);
36438 /* Subroutine of ix86_expand_builtin to take care of insns with
36439 variable number of operands. */
36442 ix86_expand_args_builtin (const struct builtin_description *d,
36443 tree exp, rtx target)
36445 rtx pat, real_target;
36446 unsigned int i, nargs;
36447 unsigned int nargs_constant = 0;
36448 unsigned int mask_pos = 0;
36449 int num_memory = 0;
36455 bool last_arg_count = false;
36456 enum insn_code icode = d->icode;
36457 const struct insn_data_d *insn_p = &insn_data[icode];
36458 machine_mode tmode = insn_p->operand[0].mode;
36459 machine_mode rmode = VOIDmode;
36461 enum rtx_code comparison = d->comparison;
36463 switch ((enum ix86_builtin_func_type) d->flag)
36465 case V2DF_FTYPE_V2DF_ROUND:
36466 case V4DF_FTYPE_V4DF_ROUND:
36467 case V4SF_FTYPE_V4SF_ROUND:
36468 case V8SF_FTYPE_V8SF_ROUND:
36469 case V4SI_FTYPE_V4SF_ROUND:
36470 case V8SI_FTYPE_V8SF_ROUND:
36471 return ix86_expand_sse_round (d, exp, target);
36472 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36473 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36474 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36475 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36476 case INT_FTYPE_V8SF_V8SF_PTEST:
36477 case INT_FTYPE_V4DI_V4DI_PTEST:
36478 case INT_FTYPE_V4DF_V4DF_PTEST:
36479 case INT_FTYPE_V4SF_V4SF_PTEST:
36480 case INT_FTYPE_V2DI_V2DI_PTEST:
36481 case INT_FTYPE_V2DF_V2DF_PTEST:
36482 return ix86_expand_sse_ptest (d, exp, target);
36483 case FLOAT128_FTYPE_FLOAT128:
36484 case FLOAT_FTYPE_FLOAT:
36485 case INT_FTYPE_INT:
36486 case UINT64_FTYPE_INT:
36487 case UINT16_FTYPE_UINT16:
36488 case INT64_FTYPE_INT64:
36489 case INT64_FTYPE_V4SF:
36490 case INT64_FTYPE_V2DF:
36491 case INT_FTYPE_V16QI:
36492 case INT_FTYPE_V8QI:
36493 case INT_FTYPE_V8SF:
36494 case INT_FTYPE_V4DF:
36495 case INT_FTYPE_V4SF:
36496 case INT_FTYPE_V2DF:
36497 case INT_FTYPE_V32QI:
36498 case V16QI_FTYPE_V16QI:
36499 case V8SI_FTYPE_V8SF:
36500 case V8SI_FTYPE_V4SI:
36501 case V8HI_FTYPE_V8HI:
36502 case V8HI_FTYPE_V16QI:
36503 case V8QI_FTYPE_V8QI:
36504 case V8SF_FTYPE_V8SF:
36505 case V8SF_FTYPE_V8SI:
36506 case V8SF_FTYPE_V4SF:
36507 case V8SF_FTYPE_V8HI:
36508 case V4SI_FTYPE_V4SI:
36509 case V4SI_FTYPE_V16QI:
36510 case V4SI_FTYPE_V4SF:
36511 case V4SI_FTYPE_V8SI:
36512 case V4SI_FTYPE_V8HI:
36513 case V4SI_FTYPE_V4DF:
36514 case V4SI_FTYPE_V2DF:
36515 case V4HI_FTYPE_V4HI:
36516 case V4DF_FTYPE_V4DF:
36517 case V4DF_FTYPE_V4SI:
36518 case V4DF_FTYPE_V4SF:
36519 case V4DF_FTYPE_V2DF:
36520 case V4SF_FTYPE_V4SF:
36521 case V4SF_FTYPE_V4SI:
36522 case V4SF_FTYPE_V8SF:
36523 case V4SF_FTYPE_V4DF:
36524 case V4SF_FTYPE_V8HI:
36525 case V4SF_FTYPE_V2DF:
36526 case V2DI_FTYPE_V2DI:
36527 case V2DI_FTYPE_V16QI:
36528 case V2DI_FTYPE_V8HI:
36529 case V2DI_FTYPE_V4SI:
36530 case V2DF_FTYPE_V2DF:
36531 case V2DF_FTYPE_V4SI:
36532 case V2DF_FTYPE_V4DF:
36533 case V2DF_FTYPE_V4SF:
36534 case V2DF_FTYPE_V2SI:
36535 case V2SI_FTYPE_V2SI:
36536 case V2SI_FTYPE_V4SF:
36537 case V2SI_FTYPE_V2SF:
36538 case V2SI_FTYPE_V2DF:
36539 case V2SF_FTYPE_V2SF:
36540 case V2SF_FTYPE_V2SI:
36541 case V32QI_FTYPE_V32QI:
36542 case V32QI_FTYPE_V16QI:
36543 case V16HI_FTYPE_V16HI:
36544 case V16HI_FTYPE_V8HI:
36545 case V8SI_FTYPE_V8SI:
36546 case V16HI_FTYPE_V16QI:
36547 case V8SI_FTYPE_V16QI:
36548 case V4DI_FTYPE_V16QI:
36549 case V8SI_FTYPE_V8HI:
36550 case V4DI_FTYPE_V8HI:
36551 case V4DI_FTYPE_V4SI:
36552 case V4DI_FTYPE_V2DI:
36554 case HI_FTYPE_V16QI:
36555 case SI_FTYPE_V32QI:
36556 case DI_FTYPE_V64QI:
36557 case V16QI_FTYPE_HI:
36558 case V32QI_FTYPE_SI:
36559 case V64QI_FTYPE_DI:
36560 case V8HI_FTYPE_QI:
36561 case V16HI_FTYPE_HI:
36562 case V32HI_FTYPE_SI:
36563 case V4SI_FTYPE_QI:
36564 case V8SI_FTYPE_QI:
36565 case V4SI_FTYPE_HI:
36566 case V8SI_FTYPE_HI:
36567 case QI_FTYPE_V8HI:
36568 case HI_FTYPE_V16HI:
36569 case SI_FTYPE_V32HI:
36570 case QI_FTYPE_V4SI:
36571 case QI_FTYPE_V8SI:
36572 case HI_FTYPE_V16SI:
36573 case QI_FTYPE_V2DI:
36574 case QI_FTYPE_V4DI:
36575 case QI_FTYPE_V8DI:
36576 case UINT_FTYPE_V2DF:
36577 case UINT_FTYPE_V4SF:
36578 case UINT64_FTYPE_V2DF:
36579 case UINT64_FTYPE_V4SF:
36580 case V16QI_FTYPE_V8DI:
36581 case V16HI_FTYPE_V16SI:
36582 case V16SI_FTYPE_HI:
36583 case V2DI_FTYPE_QI:
36584 case V4DI_FTYPE_QI:
36585 case V16SI_FTYPE_V16SI:
36586 case V16SI_FTYPE_INT:
36587 case V16SF_FTYPE_FLOAT:
36588 case V16SF_FTYPE_V8SF:
36589 case V16SI_FTYPE_V8SI:
36590 case V16SF_FTYPE_V4SF:
36591 case V16SI_FTYPE_V4SI:
36592 case V16SF_FTYPE_V16SF:
36593 case V8HI_FTYPE_V8DI:
36594 case V8UHI_FTYPE_V8UHI:
36595 case V8SI_FTYPE_V8DI:
36596 case V8SF_FTYPE_V8DF:
36597 case V8DI_FTYPE_QI:
36598 case V8DI_FTYPE_INT64:
36599 case V8DI_FTYPE_V4DI:
36600 case V8DI_FTYPE_V8DI:
36601 case V8DF_FTYPE_DOUBLE:
36602 case V8DF_FTYPE_V4DF:
36603 case V8DF_FTYPE_V2DF:
36604 case V8DF_FTYPE_V8DF:
36605 case V8DF_FTYPE_V8SI:
36608 case V4SF_FTYPE_V4SF_VEC_MERGE:
36609 case V2DF_FTYPE_V2DF_VEC_MERGE:
36610 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36611 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36612 case V16QI_FTYPE_V16QI_V16QI:
36613 case V16QI_FTYPE_V8HI_V8HI:
36614 case V16SI_FTYPE_V16SI_V16SI:
36615 case V16SF_FTYPE_V16SF_V16SF:
36616 case V16SF_FTYPE_V16SF_V16SI:
36617 case V8QI_FTYPE_V8QI_V8QI:
36618 case V8QI_FTYPE_V4HI_V4HI:
36619 case V8HI_FTYPE_V8HI_V8HI:
36620 case V8HI_FTYPE_V16QI_V16QI:
36621 case V8HI_FTYPE_V4SI_V4SI:
36622 case V8SF_FTYPE_V8SF_V8SF:
36623 case V8SF_FTYPE_V8SF_V8SI:
36624 case V8DI_FTYPE_V8DI_V8DI:
36625 case V8DF_FTYPE_V8DF_V8DF:
36626 case V8DF_FTYPE_V8DF_V8DI:
36627 case V4SI_FTYPE_V4SI_V4SI:
36628 case V4SI_FTYPE_V8HI_V8HI:
36629 case V4SI_FTYPE_V4SF_V4SF:
36630 case V4SI_FTYPE_V2DF_V2DF:
36631 case V4HI_FTYPE_V4HI_V4HI:
36632 case V4HI_FTYPE_V8QI_V8QI:
36633 case V4HI_FTYPE_V2SI_V2SI:
36634 case V4DF_FTYPE_V4DF_V4DF:
36635 case V4DF_FTYPE_V4DF_V4DI:
36636 case V4SF_FTYPE_V4SF_V4SF:
36637 case V4SF_FTYPE_V4SF_V4SI:
36638 case V4SF_FTYPE_V4SF_V2SI:
36639 case V4SF_FTYPE_V4SF_V2DF:
36640 case V4SF_FTYPE_V4SF_UINT:
36641 case V4SF_FTYPE_V4SF_UINT64:
36642 case V4SF_FTYPE_V4SF_DI:
36643 case V4SF_FTYPE_V4SF_SI:
36644 case V2DI_FTYPE_V2DI_V2DI:
36645 case V2DI_FTYPE_V16QI_V16QI:
36646 case V2DI_FTYPE_V4SI_V4SI:
36647 case V2UDI_FTYPE_V4USI_V4USI:
36648 case V2DI_FTYPE_V2DI_V16QI:
36649 case V2DI_FTYPE_V2DF_V2DF:
36650 case V2SI_FTYPE_V2SI_V2SI:
36651 case V2SI_FTYPE_V4HI_V4HI:
36652 case V2SI_FTYPE_V2SF_V2SF:
36653 case V2DF_FTYPE_V2DF_V2DF:
36654 case V2DF_FTYPE_V2DF_V4SF:
36655 case V2DF_FTYPE_V2DF_V2DI:
36656 case V2DF_FTYPE_V2DF_DI:
36657 case V2DF_FTYPE_V2DF_SI:
36658 case V2DF_FTYPE_V2DF_UINT:
36659 case V2DF_FTYPE_V2DF_UINT64:
36660 case V2SF_FTYPE_V2SF_V2SF:
36661 case V1DI_FTYPE_V1DI_V1DI:
36662 case V1DI_FTYPE_V8QI_V8QI:
36663 case V1DI_FTYPE_V2SI_V2SI:
36664 case V32QI_FTYPE_V16HI_V16HI:
36665 case V16HI_FTYPE_V8SI_V8SI:
36666 case V32QI_FTYPE_V32QI_V32QI:
36667 case V16HI_FTYPE_V32QI_V32QI:
36668 case V16HI_FTYPE_V16HI_V16HI:
36669 case V8SI_FTYPE_V4DF_V4DF:
36670 case V8SI_FTYPE_V8SI_V8SI:
36671 case V8SI_FTYPE_V16HI_V16HI:
36672 case V4DI_FTYPE_V4DI_V4DI:
36673 case V4DI_FTYPE_V8SI_V8SI:
36674 case V4UDI_FTYPE_V8USI_V8USI:
36675 case QI_FTYPE_V8DI_V8DI:
36676 case V8DI_FTYPE_V64QI_V64QI:
36677 case HI_FTYPE_V16SI_V16SI:
36678 if (comparison == UNKNOWN)
36679 return ix86_expand_binop_builtin (icode, exp, target);
36682 case V4SF_FTYPE_V4SF_V4SF_SWAP:
36683 case V2DF_FTYPE_V2DF_V2DF_SWAP:
36684 gcc_assert (comparison != UNKNOWN);
36688 case V16HI_FTYPE_V16HI_V8HI_COUNT:
36689 case V16HI_FTYPE_V16HI_SI_COUNT:
36690 case V8SI_FTYPE_V8SI_V4SI_COUNT:
36691 case V8SI_FTYPE_V8SI_SI_COUNT:
36692 case V4DI_FTYPE_V4DI_V2DI_COUNT:
36693 case V4DI_FTYPE_V4DI_INT_COUNT:
36694 case V8HI_FTYPE_V8HI_V8HI_COUNT:
36695 case V8HI_FTYPE_V8HI_SI_COUNT:
36696 case V4SI_FTYPE_V4SI_V4SI_COUNT:
36697 case V4SI_FTYPE_V4SI_SI_COUNT:
36698 case V4HI_FTYPE_V4HI_V4HI_COUNT:
36699 case V4HI_FTYPE_V4HI_SI_COUNT:
36700 case V2DI_FTYPE_V2DI_V2DI_COUNT:
36701 case V2DI_FTYPE_V2DI_SI_COUNT:
36702 case V2SI_FTYPE_V2SI_V2SI_COUNT:
36703 case V2SI_FTYPE_V2SI_SI_COUNT:
36704 case V1DI_FTYPE_V1DI_V1DI_COUNT:
36705 case V1DI_FTYPE_V1DI_SI_COUNT:
36707 last_arg_count = true;
36709 case UINT64_FTYPE_UINT64_UINT64:
36710 case UINT_FTYPE_UINT_UINT:
36711 case UINT_FTYPE_UINT_USHORT:
36712 case UINT_FTYPE_UINT_UCHAR:
36713 case UINT16_FTYPE_UINT16_INT:
36714 case UINT8_FTYPE_UINT8_INT:
36715 case HI_FTYPE_HI_HI:
36716 case SI_FTYPE_SI_SI:
36717 case DI_FTYPE_DI_DI:
36718 case V16SI_FTYPE_V8DF_V8DF:
36721 case V2DI_FTYPE_V2DI_INT_CONVERT:
36724 nargs_constant = 1;
36726 case V4DI_FTYPE_V4DI_INT_CONVERT:
36729 nargs_constant = 1;
36731 case V8DI_FTYPE_V8DI_INT_CONVERT:
36734 nargs_constant = 1;
36736 case V8HI_FTYPE_V8HI_INT:
36737 case V8HI_FTYPE_V8SF_INT:
36738 case V16HI_FTYPE_V16SF_INT:
36739 case V8HI_FTYPE_V4SF_INT:
36740 case V8SF_FTYPE_V8SF_INT:
36741 case V4SF_FTYPE_V16SF_INT:
36742 case V16SF_FTYPE_V16SF_INT:
36743 case V4SI_FTYPE_V4SI_INT:
36744 case V4SI_FTYPE_V8SI_INT:
36745 case V4HI_FTYPE_V4HI_INT:
36746 case V4DF_FTYPE_V4DF_INT:
36747 case V4DF_FTYPE_V8DF_INT:
36748 case V4SF_FTYPE_V4SF_INT:
36749 case V4SF_FTYPE_V8SF_INT:
36750 case V2DI_FTYPE_V2DI_INT:
36751 case V2DF_FTYPE_V2DF_INT:
36752 case V2DF_FTYPE_V4DF_INT:
36753 case V16HI_FTYPE_V16HI_INT:
36754 case V8SI_FTYPE_V8SI_INT:
36755 case V16SI_FTYPE_V16SI_INT:
36756 case V4SI_FTYPE_V16SI_INT:
36757 case V4DI_FTYPE_V4DI_INT:
36758 case V2DI_FTYPE_V4DI_INT:
36759 case V4DI_FTYPE_V8DI_INT:
36760 case HI_FTYPE_HI_INT:
36761 case QI_FTYPE_V4SF_INT:
36762 case QI_FTYPE_V2DF_INT:
36764 nargs_constant = 1;
36766 case V16QI_FTYPE_V16QI_V16QI_V16QI:
36767 case V8SF_FTYPE_V8SF_V8SF_V8SF:
36768 case V4DF_FTYPE_V4DF_V4DF_V4DF:
36769 case V4SF_FTYPE_V4SF_V4SF_V4SF:
36770 case V2DF_FTYPE_V2DF_V2DF_V2DF:
36771 case V32QI_FTYPE_V32QI_V32QI_V32QI:
36772 case HI_FTYPE_V16SI_V16SI_HI:
36773 case QI_FTYPE_V8DI_V8DI_QI:
36774 case V16HI_FTYPE_V16SI_V16HI_HI:
36775 case V16QI_FTYPE_V16SI_V16QI_HI:
36776 case V16QI_FTYPE_V8DI_V16QI_QI:
36777 case V16SF_FTYPE_V16SF_V16SF_HI:
36778 case V16SF_FTYPE_V16SF_V16SF_V16SF:
36779 case V16SF_FTYPE_V16SF_V16SI_V16SF:
36780 case V16SF_FTYPE_V16SI_V16SF_HI:
36781 case V16SF_FTYPE_V16SI_V16SF_V16SF:
36782 case V16SF_FTYPE_V4SF_V16SF_HI:
36783 case V16SI_FTYPE_SI_V16SI_HI:
36784 case V16SI_FTYPE_V16HI_V16SI_HI:
36785 case V16SI_FTYPE_V16QI_V16SI_HI:
36786 case V16SI_FTYPE_V16SF_V16SI_HI:
36787 case V8SF_FTYPE_V4SF_V8SF_QI:
36788 case V4DF_FTYPE_V2DF_V4DF_QI:
36789 case V8SI_FTYPE_V4SI_V8SI_QI:
36790 case V8SI_FTYPE_SI_V8SI_QI:
36791 case V4SI_FTYPE_V4SI_V4SI_QI:
36792 case V4SI_FTYPE_SI_V4SI_QI:
36793 case V4DI_FTYPE_V2DI_V4DI_QI:
36794 case V4DI_FTYPE_DI_V4DI_QI:
36795 case V2DI_FTYPE_V2DI_V2DI_QI:
36796 case V2DI_FTYPE_DI_V2DI_QI:
36797 case V64QI_FTYPE_V64QI_V64QI_DI:
36798 case V64QI_FTYPE_V16QI_V64QI_DI:
36799 case V64QI_FTYPE_QI_V64QI_DI:
36800 case V32QI_FTYPE_V32QI_V32QI_SI:
36801 case V32QI_FTYPE_V16QI_V32QI_SI:
36802 case V32QI_FTYPE_QI_V32QI_SI:
36803 case V16QI_FTYPE_V16QI_V16QI_HI:
36804 case V16QI_FTYPE_QI_V16QI_HI:
36805 case V32HI_FTYPE_V8HI_V32HI_SI:
36806 case V32HI_FTYPE_HI_V32HI_SI:
36807 case V16HI_FTYPE_V8HI_V16HI_HI:
36808 case V16HI_FTYPE_HI_V16HI_HI:
36809 case V8HI_FTYPE_V8HI_V8HI_QI:
36810 case V8HI_FTYPE_HI_V8HI_QI:
36811 case V8SF_FTYPE_V8HI_V8SF_QI:
36812 case V4SF_FTYPE_V8HI_V4SF_QI:
36813 case V8SI_FTYPE_V8SF_V8SI_QI:
36814 case V4SI_FTYPE_V4SF_V4SI_QI:
36815 case V8DI_FTYPE_V8SF_V8DI_QI:
36816 case V4DI_FTYPE_V4SF_V4DI_QI:
36817 case V2DI_FTYPE_V4SF_V2DI_QI:
36818 case V8SF_FTYPE_V8DI_V8SF_QI:
36819 case V4SF_FTYPE_V4DI_V4SF_QI:
36820 case V4SF_FTYPE_V2DI_V4SF_QI:
36821 case V8DF_FTYPE_V8DI_V8DF_QI:
36822 case V4DF_FTYPE_V4DI_V4DF_QI:
36823 case V2DF_FTYPE_V2DI_V2DF_QI:
36824 case V16QI_FTYPE_V8HI_V16QI_QI:
36825 case V16QI_FTYPE_V16HI_V16QI_HI:
36826 case V16QI_FTYPE_V4SI_V16QI_QI:
36827 case V16QI_FTYPE_V8SI_V16QI_QI:
36828 case V8HI_FTYPE_V4SI_V8HI_QI:
36829 case V8HI_FTYPE_V8SI_V8HI_QI:
36830 case V16QI_FTYPE_V2DI_V16QI_QI:
36831 case V16QI_FTYPE_V4DI_V16QI_QI:
36832 case V8HI_FTYPE_V2DI_V8HI_QI:
36833 case V8HI_FTYPE_V4DI_V8HI_QI:
36834 case V4SI_FTYPE_V2DI_V4SI_QI:
36835 case V4SI_FTYPE_V4DI_V4SI_QI:
36836 case V32QI_FTYPE_V32HI_V32QI_SI:
36837 case HI_FTYPE_V16QI_V16QI_HI:
36838 case SI_FTYPE_V32QI_V32QI_SI:
36839 case DI_FTYPE_V64QI_V64QI_DI:
36840 case QI_FTYPE_V8HI_V8HI_QI:
36841 case HI_FTYPE_V16HI_V16HI_HI:
36842 case SI_FTYPE_V32HI_V32HI_SI:
36843 case QI_FTYPE_V4SI_V4SI_QI:
36844 case QI_FTYPE_V8SI_V8SI_QI:
36845 case QI_FTYPE_V2DI_V2DI_QI:
36846 case QI_FTYPE_V4DI_V4DI_QI:
36847 case V4SF_FTYPE_V2DF_V4SF_QI:
36848 case V4SF_FTYPE_V4DF_V4SF_QI:
36849 case V16SI_FTYPE_V16SI_V16SI_HI:
36850 case V16SI_FTYPE_V16SI_V16SI_V16SI:
36851 case V16SI_FTYPE_V4SI_V16SI_HI:
36852 case V2DI_FTYPE_V2DI_V2DI_V2DI:
36853 case V2DI_FTYPE_V4SI_V2DI_QI:
36854 case V2DI_FTYPE_V8HI_V2DI_QI:
36855 case V2DI_FTYPE_V16QI_V2DI_QI:
36856 case V4DI_FTYPE_V4DI_V4DI_QI:
36857 case V4DI_FTYPE_V4SI_V4DI_QI:
36858 case V4DI_FTYPE_V8HI_V4DI_QI:
36859 case V4DI_FTYPE_V16QI_V4DI_QI:
36860 case V8DI_FTYPE_V8DF_V8DI_QI:
36861 case V4DI_FTYPE_V4DF_V4DI_QI:
36862 case V2DI_FTYPE_V2DF_V2DI_QI:
36863 case V4SI_FTYPE_V4DF_V4SI_QI:
36864 case V4SI_FTYPE_V2DF_V4SI_QI:
36865 case V4SI_FTYPE_V8HI_V4SI_QI:
36866 case V4SI_FTYPE_V16QI_V4SI_QI:
36867 case V8SI_FTYPE_V8SI_V8SI_V8SI:
36868 case V4DI_FTYPE_V4DI_V4DI_V4DI:
36869 case V8DF_FTYPE_V2DF_V8DF_QI:
36870 case V8DF_FTYPE_V4DF_V8DF_QI:
36871 case V8DF_FTYPE_V8DF_V8DF_QI:
36872 case V8DF_FTYPE_V8DF_V8DF_V8DF:
36873 case V8SF_FTYPE_V8SF_V8SF_QI:
36874 case V8SF_FTYPE_V8SI_V8SF_QI:
36875 case V4DF_FTYPE_V4DF_V4DF_QI:
36876 case V4SF_FTYPE_V4SF_V4SF_QI:
36877 case V2DF_FTYPE_V2DF_V2DF_QI:
36878 case V2DF_FTYPE_V4SF_V2DF_QI:
36879 case V2DF_FTYPE_V4SI_V2DF_QI:
36880 case V4SF_FTYPE_V4SI_V4SF_QI:
36881 case V4DF_FTYPE_V4SF_V4DF_QI:
36882 case V4DF_FTYPE_V4SI_V4DF_QI:
36883 case V8SI_FTYPE_V8SI_V8SI_QI:
36884 case V8SI_FTYPE_V8HI_V8SI_QI:
36885 case V8SI_FTYPE_V16QI_V8SI_QI:
36886 case V8DF_FTYPE_V8DF_V8DI_V8DF:
36887 case V8DF_FTYPE_V8DI_V8DF_V8DF:
36888 case V8DF_FTYPE_V8SF_V8DF_QI:
36889 case V8DF_FTYPE_V8SI_V8DF_QI:
36890 case V8DI_FTYPE_DI_V8DI_QI:
36891 case V16SF_FTYPE_V8SF_V16SF_HI:
36892 case V16SI_FTYPE_V8SI_V16SI_HI:
36893 case V16HI_FTYPE_V16HI_V16HI_HI:
36894 case V8HI_FTYPE_V16QI_V8HI_QI:
36895 case V16HI_FTYPE_V16QI_V16HI_HI:
36896 case V32HI_FTYPE_V32HI_V32HI_SI:
36897 case V32HI_FTYPE_V32QI_V32HI_SI:
36898 case V8DI_FTYPE_V16QI_V8DI_QI:
36899 case V8DI_FTYPE_V2DI_V8DI_QI:
36900 case V8DI_FTYPE_V4DI_V8DI_QI:
36901 case V8DI_FTYPE_V8DI_V8DI_QI:
36902 case V8DI_FTYPE_V8DI_V8DI_V8DI:
36903 case V8DI_FTYPE_V8HI_V8DI_QI:
36904 case V8DI_FTYPE_V8SI_V8DI_QI:
36905 case V8HI_FTYPE_V8DI_V8HI_QI:
36906 case V8SF_FTYPE_V8DF_V8SF_QI:
36907 case V8SI_FTYPE_V8DF_V8SI_QI:
36908 case V8SI_FTYPE_V8DI_V8SI_QI:
36909 case V4SI_FTYPE_V4SI_V4SI_V4SI:
36912 case V32QI_FTYPE_V32QI_V32QI_INT:
36913 case V16HI_FTYPE_V16HI_V16HI_INT:
36914 case V16QI_FTYPE_V16QI_V16QI_INT:
36915 case V4DI_FTYPE_V4DI_V4DI_INT:
36916 case V8HI_FTYPE_V8HI_V8HI_INT:
36917 case V8SI_FTYPE_V8SI_V8SI_INT:
36918 case V8SI_FTYPE_V8SI_V4SI_INT:
36919 case V8SF_FTYPE_V8SF_V8SF_INT:
36920 case V8SF_FTYPE_V8SF_V4SF_INT:
36921 case V4SI_FTYPE_V4SI_V4SI_INT:
36922 case V4DF_FTYPE_V4DF_V4DF_INT:
36923 case V16SF_FTYPE_V16SF_V16SF_INT:
36924 case V16SF_FTYPE_V16SF_V4SF_INT:
36925 case V16SI_FTYPE_V16SI_V4SI_INT:
36926 case V4DF_FTYPE_V4DF_V2DF_INT:
36927 case V4SF_FTYPE_V4SF_V4SF_INT:
36928 case V2DI_FTYPE_V2DI_V2DI_INT:
36929 case V4DI_FTYPE_V4DI_V2DI_INT:
36930 case V2DF_FTYPE_V2DF_V2DF_INT:
36931 case QI_FTYPE_V8DI_V8DI_INT:
36932 case QI_FTYPE_V8DF_V8DF_INT:
36933 case QI_FTYPE_V2DF_V2DF_INT:
36934 case QI_FTYPE_V4SF_V4SF_INT:
36935 case HI_FTYPE_V16SI_V16SI_INT:
36936 case HI_FTYPE_V16SF_V16SF_INT:
36938 nargs_constant = 1;
36940 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
36943 nargs_constant = 1;
36945 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
36948 nargs_constant = 1;
36950 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
36953 nargs_constant = 1;
36955 case V2DI_FTYPE_V2DI_UINT_UINT:
36957 nargs_constant = 2;
36959 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
36962 nargs_constant = 1;
36964 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
36968 nargs_constant = 1;
36970 case QI_FTYPE_V8DF_INT_QI:
36971 case QI_FTYPE_V4DF_INT_QI:
36972 case QI_FTYPE_V2DF_INT_QI:
36973 case HI_FTYPE_V16SF_INT_HI:
36974 case QI_FTYPE_V8SF_INT_QI:
36975 case QI_FTYPE_V4SF_INT_QI:
36978 nargs_constant = 1;
36980 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
36984 nargs_constant = 1;
36986 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
36990 nargs_constant = 1;
36992 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
36993 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
36994 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
36995 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
36996 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
36997 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
36998 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
36999 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
37000 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
37001 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
37002 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
37003 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
37004 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
37005 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
37006 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
37007 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
37008 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
37009 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
37010 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
37011 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
37012 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
37013 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
37014 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
37015 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
37016 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
37017 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
37018 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
37019 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
37020 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
37021 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
37022 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
37023 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
37024 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
37025 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
37026 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
37027 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
37028 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
37029 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
37030 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
37031 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
37032 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
37033 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
37034 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
37035 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
37036 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
37037 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
37038 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
37039 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
37040 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
37041 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
37042 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
37043 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
37044 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
37045 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
37048 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
37049 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
37050 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37051 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37052 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37054 nargs_constant = 1;
37056 case QI_FTYPE_V4DI_V4DI_INT_QI:
37057 case QI_FTYPE_V8SI_V8SI_INT_QI:
37058 case QI_FTYPE_V4DF_V4DF_INT_QI:
37059 case QI_FTYPE_V8SF_V8SF_INT_QI:
37060 case QI_FTYPE_V2DI_V2DI_INT_QI:
37061 case QI_FTYPE_V4SI_V4SI_INT_QI:
37062 case QI_FTYPE_V2DF_V2DF_INT_QI:
37063 case QI_FTYPE_V4SF_V4SF_INT_QI:
37064 case DI_FTYPE_V64QI_V64QI_INT_DI:
37065 case SI_FTYPE_V32QI_V32QI_INT_SI:
37066 case HI_FTYPE_V16QI_V16QI_INT_HI:
37067 case SI_FTYPE_V32HI_V32HI_INT_SI:
37068 case HI_FTYPE_V16HI_V16HI_INT_HI:
37069 case QI_FTYPE_V8HI_V8HI_INT_QI:
37072 nargs_constant = 1;
37074 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37076 nargs_constant = 2;
37078 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37079 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37082 case QI_FTYPE_V8DI_V8DI_INT_QI:
37083 case HI_FTYPE_V16SI_V16SI_INT_HI:
37084 case QI_FTYPE_V8DF_V8DF_INT_QI:
37085 case HI_FTYPE_V16SF_V16SF_INT_HI:
37088 nargs_constant = 1;
37090 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
37091 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
37092 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
37093 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
37094 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
37095 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
37096 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
37097 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
37098 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
37099 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
37100 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
37101 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
37102 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
37103 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
37104 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
37105 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
37106 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
37107 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
37108 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
37109 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
37110 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
37111 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
37112 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
37113 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
37114 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
37115 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
37116 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
37117 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
37118 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
37119 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
37122 nargs_constant = 1;
37124 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
37125 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
37126 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
37127 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
37128 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
37129 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
37130 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
37131 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
37132 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
37133 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
37134 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
37135 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
37136 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
37137 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
37138 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
37139 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
37140 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
37141 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
37142 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
37143 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
37144 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
37145 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
37146 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
37147 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
37148 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
37149 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
37150 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
37153 nargs_constant = 1;
37155 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
37156 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
37157 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
37158 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
37159 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
37160 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
37161 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
37162 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
37163 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
37164 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
37165 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
37169 nargs_constant = 1;
37173 gcc_unreachable ();
37176 gcc_assert (nargs <= ARRAY_SIZE (args));
37178 if (comparison != UNKNOWN)
37180 gcc_assert (nargs == 2);
37181 return ix86_expand_sse_compare (d, exp, target, swap);
37184 if (rmode == VOIDmode || rmode == tmode)
37188 || GET_MODE (target) != tmode
37189 || !insn_p->operand[0].predicate (target, tmode))
37190 target = gen_reg_rtx (tmode);
37191 real_target = target;
37195 real_target = gen_reg_rtx (tmode);
37196 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37199 for (i = 0; i < nargs; i++)
37201 tree arg = CALL_EXPR_ARG (exp, i);
37202 rtx op = expand_normal (arg);
37203 machine_mode mode = insn_p->operand[i + 1].mode;
37204 bool match = insn_p->operand[i + 1].predicate (op, mode);
37206 if (last_arg_count && (i + 1) == nargs)
37208 /* SIMD shift insns take either an 8-bit immediate or
37209 register as count. But builtin functions take int as
37210 count. If count doesn't match, we put it in register. */
37213 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37214 if (!insn_p->operand[i + 1].predicate (op, mode))
37215 op = copy_to_reg (op);
37218 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37219 (!mask_pos && (nargs - i) <= nargs_constant))
37224 case CODE_FOR_avx_vinsertf128v4di:
37225 case CODE_FOR_avx_vextractf128v4di:
37226 error ("the last argument must be an 1-bit immediate");
37229 case CODE_FOR_avx512f_cmpv8di3_mask:
37230 case CODE_FOR_avx512f_cmpv16si3_mask:
37231 case CODE_FOR_avx512f_ucmpv8di3_mask:
37232 case CODE_FOR_avx512f_ucmpv16si3_mask:
37233 case CODE_FOR_avx512vl_cmpv4di3_mask:
37234 case CODE_FOR_avx512vl_cmpv8si3_mask:
37235 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37236 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37237 case CODE_FOR_avx512vl_cmpv2di3_mask:
37238 case CODE_FOR_avx512vl_cmpv4si3_mask:
37239 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37240 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37241 error ("the last argument must be a 3-bit immediate");
37244 case CODE_FOR_sse4_1_roundsd:
37245 case CODE_FOR_sse4_1_roundss:
37247 case CODE_FOR_sse4_1_roundpd:
37248 case CODE_FOR_sse4_1_roundps:
37249 case CODE_FOR_avx_roundpd256:
37250 case CODE_FOR_avx_roundps256:
37252 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37253 case CODE_FOR_sse4_1_roundps_sfix:
37254 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37255 case CODE_FOR_avx_roundps_sfix256:
37257 case CODE_FOR_sse4_1_blendps:
37258 case CODE_FOR_avx_blendpd256:
37259 case CODE_FOR_avx_vpermilv4df:
37260 case CODE_FOR_avx_vpermilv4df_mask:
37261 case CODE_FOR_avx512f_getmantv8df_mask:
37262 case CODE_FOR_avx512f_getmantv16sf_mask:
37263 case CODE_FOR_avx512vl_getmantv8sf_mask:
37264 case CODE_FOR_avx512vl_getmantv4df_mask:
37265 case CODE_FOR_avx512vl_getmantv4sf_mask:
37266 case CODE_FOR_avx512vl_getmantv2df_mask:
37267 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37268 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37269 case CODE_FOR_avx512dq_rangepv4df_mask:
37270 case CODE_FOR_avx512dq_rangepv8sf_mask:
37271 case CODE_FOR_avx512dq_rangepv2df_mask:
37272 case CODE_FOR_avx512dq_rangepv4sf_mask:
37273 case CODE_FOR_avx_shufpd256_mask:
37274 error ("the last argument must be a 4-bit immediate");
37277 case CODE_FOR_sha1rnds4:
37278 case CODE_FOR_sse4_1_blendpd:
37279 case CODE_FOR_avx_vpermilv2df:
37280 case CODE_FOR_avx_vpermilv2df_mask:
37281 case CODE_FOR_xop_vpermil2v2df3:
37282 case CODE_FOR_xop_vpermil2v4sf3:
37283 case CODE_FOR_xop_vpermil2v4df3:
37284 case CODE_FOR_xop_vpermil2v8sf3:
37285 case CODE_FOR_avx512f_vinsertf32x4_mask:
37286 case CODE_FOR_avx512f_vinserti32x4_mask:
37287 case CODE_FOR_avx512f_vextractf32x4_mask:
37288 case CODE_FOR_avx512f_vextracti32x4_mask:
37289 case CODE_FOR_sse2_shufpd:
37290 case CODE_FOR_sse2_shufpd_mask:
37291 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37292 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37293 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37294 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37295 error ("the last argument must be a 2-bit immediate");
37298 case CODE_FOR_avx_vextractf128v4df:
37299 case CODE_FOR_avx_vextractf128v8sf:
37300 case CODE_FOR_avx_vextractf128v8si:
37301 case CODE_FOR_avx_vinsertf128v4df:
37302 case CODE_FOR_avx_vinsertf128v8sf:
37303 case CODE_FOR_avx_vinsertf128v8si:
37304 case CODE_FOR_avx512f_vinsertf64x4_mask:
37305 case CODE_FOR_avx512f_vinserti64x4_mask:
37306 case CODE_FOR_avx512f_vextractf64x4_mask:
37307 case CODE_FOR_avx512f_vextracti64x4_mask:
37308 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37309 case CODE_FOR_avx512dq_vinserti32x8_mask:
37310 case CODE_FOR_avx512vl_vinsertv4df:
37311 case CODE_FOR_avx512vl_vinsertv4di:
37312 case CODE_FOR_avx512vl_vinsertv8sf:
37313 case CODE_FOR_avx512vl_vinsertv8si:
37314 error ("the last argument must be a 1-bit immediate");
37317 case CODE_FOR_avx_vmcmpv2df3:
37318 case CODE_FOR_avx_vmcmpv4sf3:
37319 case CODE_FOR_avx_cmpv2df3:
37320 case CODE_FOR_avx_cmpv4sf3:
37321 case CODE_FOR_avx_cmpv4df3:
37322 case CODE_FOR_avx_cmpv8sf3:
37323 case CODE_FOR_avx512f_cmpv8df3_mask:
37324 case CODE_FOR_avx512f_cmpv16sf3_mask:
37325 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37326 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37327 error ("the last argument must be a 5-bit immediate");
37331 switch (nargs_constant)
37334 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37335 (!mask_pos && (nargs - i) == nargs_constant))
37337 error ("the next to last argument must be an 8-bit immediate");
37341 error ("the last argument must be an 8-bit immediate");
37344 gcc_unreachable ();
37351 if (VECTOR_MODE_P (mode))
37352 op = safe_vector_operand (op, mode);
37354 /* If we aren't optimizing, only allow one memory operand to
37356 if (memory_operand (op, mode))
37359 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37361 if (optimize || !match || num_memory > 1)
37362 op = copy_to_mode_reg (mode, op);
37366 op = copy_to_reg (op);
37367 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37372 args[i].mode = mode;
37378 pat = GEN_FCN (icode) (real_target, args[0].op);
37381 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37384 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37388 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37389 args[2].op, args[3].op);
37392 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37393 args[2].op, args[3].op, args[4].op);
37395 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37396 args[2].op, args[3].op, args[4].op,
37400 gcc_unreachable ();
37410 /* Transform pattern of following layout:
37413 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37421 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37425 (parallel [ A B ... ]) */
37428 ix86_erase_embedded_rounding (rtx pat)
37430 if (GET_CODE (pat) == INSN)
37431 pat = PATTERN (pat);
37433 gcc_assert (GET_CODE (pat) == PARALLEL);
37435 if (XVECLEN (pat, 0) == 2)
37437 rtx p0 = XVECEXP (pat, 0, 0);
37438 rtx p1 = XVECEXP (pat, 0, 1);
37440 gcc_assert (GET_CODE (p0) == SET
37441 && GET_CODE (p1) == UNSPEC
37442 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37448 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37452 for (; i < XVECLEN (pat, 0); ++i)
37454 rtx elem = XVECEXP (pat, 0, i);
37455 if (GET_CODE (elem) != UNSPEC
37456 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37460 /* No more than 1 occurence was removed. */
37461 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37463 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37467 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37470 ix86_expand_sse_comi_round (const struct builtin_description *d,
37471 tree exp, rtx target)
37474 tree arg0 = CALL_EXPR_ARG (exp, 0);
37475 tree arg1 = CALL_EXPR_ARG (exp, 1);
37476 tree arg2 = CALL_EXPR_ARG (exp, 2);
37477 tree arg3 = CALL_EXPR_ARG (exp, 3);
37478 rtx op0 = expand_normal (arg0);
37479 rtx op1 = expand_normal (arg1);
37480 rtx op2 = expand_normal (arg2);
37481 rtx op3 = expand_normal (arg3);
37482 enum insn_code icode = d->icode;
37483 const struct insn_data_d *insn_p = &insn_data[icode];
37484 machine_mode mode0 = insn_p->operand[0].mode;
37485 machine_mode mode1 = insn_p->operand[1].mode;
37486 enum rtx_code comparison = UNEQ;
37487 bool need_ucomi = false;
37489 /* See avxintrin.h for values. */
37490 enum rtx_code comi_comparisons[32] =
37492 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37493 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37494 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37496 bool need_ucomi_values[32] =
37498 true, false, false, true, true, false, false, true,
37499 true, false, false, true, true, false, false, true,
37500 false, true, true, false, false, true, true, false,
37501 false, true, true, false, false, true, true, false
37504 if (!CONST_INT_P (op2))
37506 error ("the third argument must be comparison constant");
37509 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37511 error ("incorect comparison mode");
37515 if (!insn_p->operand[2].predicate (op3, SImode))
37517 error ("incorrect rounding operand");
37521 comparison = comi_comparisons[INTVAL (op2)];
37522 need_ucomi = need_ucomi_values[INTVAL (op2)];
37524 if (VECTOR_MODE_P (mode0))
37525 op0 = safe_vector_operand (op0, mode0);
37526 if (VECTOR_MODE_P (mode1))
37527 op1 = safe_vector_operand (op1, mode1);
37529 target = gen_reg_rtx (SImode);
37530 emit_move_insn (target, const0_rtx);
37531 target = gen_rtx_SUBREG (QImode, target, 0);
37533 if ((optimize && !register_operand (op0, mode0))
37534 || !insn_p->operand[0].predicate (op0, mode0))
37535 op0 = copy_to_mode_reg (mode0, op0);
37536 if ((optimize && !register_operand (op1, mode1))
37537 || !insn_p->operand[1].predicate (op1, mode1))
37538 op1 = copy_to_mode_reg (mode1, op1);
37541 icode = icode == CODE_FOR_sse_comi_round
37542 ? CODE_FOR_sse_ucomi_round
37543 : CODE_FOR_sse2_ucomi_round;
37545 pat = GEN_FCN (icode) (op0, op1, op3);
37549 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37550 if (INTVAL (op3) == NO_ROUND)
37552 pat = ix86_erase_embedded_rounding (pat);
37556 set_dst = SET_DEST (pat);
37560 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37561 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37565 emit_insn (gen_rtx_SET (VOIDmode,
37566 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37567 gen_rtx_fmt_ee (comparison, QImode,
37571 return SUBREG_REG (target);
37575 ix86_expand_round_builtin (const struct builtin_description *d,
37576 tree exp, rtx target)
37579 unsigned int i, nargs;
37585 enum insn_code icode = d->icode;
37586 const struct insn_data_d *insn_p = &insn_data[icode];
37587 machine_mode tmode = insn_p->operand[0].mode;
37588 unsigned int nargs_constant = 0;
37589 unsigned int redundant_embed_rnd = 0;
37591 switch ((enum ix86_builtin_func_type) d->flag)
37593 case UINT64_FTYPE_V2DF_INT:
37594 case UINT64_FTYPE_V4SF_INT:
37595 case UINT_FTYPE_V2DF_INT:
37596 case UINT_FTYPE_V4SF_INT:
37597 case INT64_FTYPE_V2DF_INT:
37598 case INT64_FTYPE_V4SF_INT:
37599 case INT_FTYPE_V2DF_INT:
37600 case INT_FTYPE_V4SF_INT:
37603 case V4SF_FTYPE_V4SF_UINT_INT:
37604 case V4SF_FTYPE_V4SF_UINT64_INT:
37605 case V2DF_FTYPE_V2DF_UINT64_INT:
37606 case V4SF_FTYPE_V4SF_INT_INT:
37607 case V4SF_FTYPE_V4SF_INT64_INT:
37608 case V2DF_FTYPE_V2DF_INT64_INT:
37609 case V4SF_FTYPE_V4SF_V4SF_INT:
37610 case V2DF_FTYPE_V2DF_V2DF_INT:
37611 case V4SF_FTYPE_V4SF_V2DF_INT:
37612 case V2DF_FTYPE_V2DF_V4SF_INT:
37615 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37616 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37617 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37618 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37619 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37620 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37621 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37622 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37623 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37624 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37625 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37626 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37627 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37628 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37631 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37632 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37633 nargs_constant = 2;
37636 case INT_FTYPE_V4SF_V4SF_INT_INT:
37637 case INT_FTYPE_V2DF_V2DF_INT_INT:
37638 return ix86_expand_sse_comi_round (d, exp, target);
37639 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37640 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37641 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37642 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37643 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37644 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37647 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37648 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37649 nargs_constant = 4;
37652 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
37653 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
37654 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
37655 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
37656 nargs_constant = 3;
37659 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37660 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37661 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37662 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37664 nargs_constant = 4;
37666 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37667 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37668 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37669 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37671 nargs_constant = 3;
37674 gcc_unreachable ();
37676 gcc_assert (nargs <= ARRAY_SIZE (args));
37680 || GET_MODE (target) != tmode
37681 || !insn_p->operand[0].predicate (target, tmode))
37682 target = gen_reg_rtx (tmode);
37684 for (i = 0; i < nargs; i++)
37686 tree arg = CALL_EXPR_ARG (exp, i);
37687 rtx op = expand_normal (arg);
37688 machine_mode mode = insn_p->operand[i + 1].mode;
37689 bool match = insn_p->operand[i + 1].predicate (op, mode);
37691 if (i == nargs - nargs_constant)
37697 case CODE_FOR_avx512f_getmantv8df_mask_round:
37698 case CODE_FOR_avx512f_getmantv16sf_mask_round:
37699 case CODE_FOR_avx512f_vgetmantv2df_round:
37700 case CODE_FOR_avx512f_vgetmantv4sf_round:
37701 error ("the immediate argument must be a 4-bit immediate");
37703 case CODE_FOR_avx512f_cmpv8df3_mask_round:
37704 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
37705 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
37706 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
37707 error ("the immediate argument must be a 5-bit immediate");
37710 error ("the immediate argument must be an 8-bit immediate");
37715 else if (i == nargs-1)
37717 if (!insn_p->operand[nargs].predicate (op, SImode))
37719 error ("incorrect rounding operand");
37723 /* If there is no rounding use normal version of the pattern. */
37724 if (INTVAL (op) == NO_ROUND)
37725 redundant_embed_rnd = 1;
37729 if (VECTOR_MODE_P (mode))
37730 op = safe_vector_operand (op, mode);
37732 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37734 if (optimize || !match)
37735 op = copy_to_mode_reg (mode, op);
37739 op = copy_to_reg (op);
37740 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37745 args[i].mode = mode;
37751 pat = GEN_FCN (icode) (target, args[0].op);
37754 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37757 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37761 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37762 args[2].op, args[3].op);
37765 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37766 args[2].op, args[3].op, args[4].op);
37768 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37769 args[2].op, args[3].op, args[4].op,
37773 gcc_unreachable ();
37779 if (redundant_embed_rnd)
37780 pat = ix86_erase_embedded_rounding (pat);
37786 /* Subroutine of ix86_expand_builtin to take care of special insns
37787 with variable number of operands. */
37790 ix86_expand_special_args_builtin (const struct builtin_description *d,
37791 tree exp, rtx target)
37795 unsigned int i, nargs, arg_adjust, memory;
37796 bool aligned_mem = false;
37802 enum insn_code icode = d->icode;
37803 bool last_arg_constant = false;
37804 const struct insn_data_d *insn_p = &insn_data[icode];
37805 machine_mode tmode = insn_p->operand[0].mode;
37806 enum { load, store } klass;
37808 switch ((enum ix86_builtin_func_type) d->flag)
37810 case VOID_FTYPE_VOID:
37811 emit_insn (GEN_FCN (icode) (target));
37813 case VOID_FTYPE_UINT64:
37814 case VOID_FTYPE_UNSIGNED:
37820 case INT_FTYPE_VOID:
37821 case USHORT_FTYPE_VOID:
37822 case UINT64_FTYPE_VOID:
37823 case UNSIGNED_FTYPE_VOID:
37828 case UINT64_FTYPE_PUNSIGNED:
37829 case V2DI_FTYPE_PV2DI:
37830 case V4DI_FTYPE_PV4DI:
37831 case V32QI_FTYPE_PCCHAR:
37832 case V16QI_FTYPE_PCCHAR:
37833 case V8SF_FTYPE_PCV4SF:
37834 case V8SF_FTYPE_PCFLOAT:
37835 case V4SF_FTYPE_PCFLOAT:
37836 case V4DF_FTYPE_PCV2DF:
37837 case V4DF_FTYPE_PCDOUBLE:
37838 case V2DF_FTYPE_PCDOUBLE:
37839 case VOID_FTYPE_PVOID:
37840 case V16SI_FTYPE_PV4SI:
37841 case V16SF_FTYPE_PV4SF:
37842 case V8DI_FTYPE_PV4DI:
37843 case V8DI_FTYPE_PV8DI:
37844 case V8DF_FTYPE_PV4DF:
37850 case CODE_FOR_sse4_1_movntdqa:
37851 case CODE_FOR_avx2_movntdqa:
37852 case CODE_FOR_avx512f_movntdqa:
37853 aligned_mem = true;
37859 case VOID_FTYPE_PV2SF_V4SF:
37860 case VOID_FTYPE_PV8DI_V8DI:
37861 case VOID_FTYPE_PV4DI_V4DI:
37862 case VOID_FTYPE_PV2DI_V2DI:
37863 case VOID_FTYPE_PCHAR_V32QI:
37864 case VOID_FTYPE_PCHAR_V16QI:
37865 case VOID_FTYPE_PFLOAT_V16SF:
37866 case VOID_FTYPE_PFLOAT_V8SF:
37867 case VOID_FTYPE_PFLOAT_V4SF:
37868 case VOID_FTYPE_PDOUBLE_V8DF:
37869 case VOID_FTYPE_PDOUBLE_V4DF:
37870 case VOID_FTYPE_PDOUBLE_V2DF:
37871 case VOID_FTYPE_PLONGLONG_LONGLONG:
37872 case VOID_FTYPE_PULONGLONG_ULONGLONG:
37873 case VOID_FTYPE_PINT_INT:
37876 /* Reserve memory operand for target. */
37877 memory = ARRAY_SIZE (args);
37880 /* These builtins and instructions require the memory
37881 to be properly aligned. */
37882 case CODE_FOR_avx_movntv4di:
37883 case CODE_FOR_sse2_movntv2di:
37884 case CODE_FOR_avx_movntv8sf:
37885 case CODE_FOR_sse_movntv4sf:
37886 case CODE_FOR_sse4a_vmmovntv4sf:
37887 case CODE_FOR_avx_movntv4df:
37888 case CODE_FOR_sse2_movntv2df:
37889 case CODE_FOR_sse4a_vmmovntv2df:
37890 case CODE_FOR_sse2_movntidi:
37891 case CODE_FOR_sse_movntq:
37892 case CODE_FOR_sse2_movntisi:
37893 case CODE_FOR_avx512f_movntv16sf:
37894 case CODE_FOR_avx512f_movntv8df:
37895 case CODE_FOR_avx512f_movntv8di:
37896 aligned_mem = true;
37902 case V4SF_FTYPE_V4SF_PCV2SF:
37903 case V2DF_FTYPE_V2DF_PCDOUBLE:
37908 case V8SF_FTYPE_PCV8SF_V8SI:
37909 case V4DF_FTYPE_PCV4DF_V4DI:
37910 case V4SF_FTYPE_PCV4SF_V4SI:
37911 case V2DF_FTYPE_PCV2DF_V2DI:
37912 case V8SI_FTYPE_PCV8SI_V8SI:
37913 case V4DI_FTYPE_PCV4DI_V4DI:
37914 case V4SI_FTYPE_PCV4SI_V4SI:
37915 case V2DI_FTYPE_PCV2DI_V2DI:
37920 case VOID_FTYPE_PV8DF_V8DF_QI:
37921 case VOID_FTYPE_PV16SF_V16SF_HI:
37922 case VOID_FTYPE_PV8DI_V8DI_QI:
37923 case VOID_FTYPE_PV4DI_V4DI_QI:
37924 case VOID_FTYPE_PV2DI_V2DI_QI:
37925 case VOID_FTYPE_PV16SI_V16SI_HI:
37926 case VOID_FTYPE_PV8SI_V8SI_QI:
37927 case VOID_FTYPE_PV4SI_V4SI_QI:
37930 /* These builtins and instructions require the memory
37931 to be properly aligned. */
37932 case CODE_FOR_avx512f_storev16sf_mask:
37933 case CODE_FOR_avx512f_storev16si_mask:
37934 case CODE_FOR_avx512f_storev8df_mask:
37935 case CODE_FOR_avx512f_storev8di_mask:
37936 case CODE_FOR_avx512vl_storev8sf_mask:
37937 case CODE_FOR_avx512vl_storev8si_mask:
37938 case CODE_FOR_avx512vl_storev4df_mask:
37939 case CODE_FOR_avx512vl_storev4di_mask:
37940 case CODE_FOR_avx512vl_storev4sf_mask:
37941 case CODE_FOR_avx512vl_storev4si_mask:
37942 case CODE_FOR_avx512vl_storev2df_mask:
37943 case CODE_FOR_avx512vl_storev2di_mask:
37944 aligned_mem = true;
37950 case VOID_FTYPE_PV8SF_V8SI_V8SF:
37951 case VOID_FTYPE_PV4DF_V4DI_V4DF:
37952 case VOID_FTYPE_PV4SF_V4SI_V4SF:
37953 case VOID_FTYPE_PV2DF_V2DI_V2DF:
37954 case VOID_FTYPE_PV8SI_V8SI_V8SI:
37955 case VOID_FTYPE_PV4DI_V4DI_V4DI:
37956 case VOID_FTYPE_PV4SI_V4SI_V4SI:
37957 case VOID_FTYPE_PV2DI_V2DI_V2DI:
37958 case VOID_FTYPE_PDOUBLE_V2DF_QI:
37959 case VOID_FTYPE_PFLOAT_V4SF_QI:
37960 case VOID_FTYPE_PV8SI_V8DI_QI:
37961 case VOID_FTYPE_PV8HI_V8DI_QI:
37962 case VOID_FTYPE_PV16HI_V16SI_HI:
37963 case VOID_FTYPE_PV16QI_V8DI_QI:
37964 case VOID_FTYPE_PV16QI_V16SI_HI:
37965 case VOID_FTYPE_PV4SI_V4DI_QI:
37966 case VOID_FTYPE_PV4SI_V2DI_QI:
37967 case VOID_FTYPE_PV8HI_V4DI_QI:
37968 case VOID_FTYPE_PV8HI_V2DI_QI:
37969 case VOID_FTYPE_PV8HI_V8SI_QI:
37970 case VOID_FTYPE_PV8HI_V4SI_QI:
37971 case VOID_FTYPE_PV16QI_V4DI_QI:
37972 case VOID_FTYPE_PV16QI_V2DI_QI:
37973 case VOID_FTYPE_PV16QI_V8SI_QI:
37974 case VOID_FTYPE_PV16QI_V4SI_QI:
37975 case VOID_FTYPE_PV8HI_V8HI_QI:
37976 case VOID_FTYPE_PV16HI_V16HI_HI:
37977 case VOID_FTYPE_PV32HI_V32HI_SI:
37978 case VOID_FTYPE_PV16QI_V16QI_HI:
37979 case VOID_FTYPE_PV32QI_V32QI_SI:
37980 case VOID_FTYPE_PV64QI_V64QI_DI:
37981 case VOID_FTYPE_PV4DF_V4DF_QI:
37982 case VOID_FTYPE_PV2DF_V2DF_QI:
37983 case VOID_FTYPE_PV8SF_V8SF_QI:
37984 case VOID_FTYPE_PV4SF_V4SF_QI:
37987 /* Reserve memory operand for target. */
37988 memory = ARRAY_SIZE (args);
37990 case V4SF_FTYPE_PCV4SF_V4SF_QI:
37991 case V8SF_FTYPE_PCV8SF_V8SF_QI:
37992 case V16SF_FTYPE_PCV16SF_V16SF_HI:
37993 case V4SI_FTYPE_PCV4SI_V4SI_QI:
37994 case V8SI_FTYPE_PCV8SI_V8SI_QI:
37995 case V16SI_FTYPE_PCV16SI_V16SI_HI:
37996 case V2DF_FTYPE_PCV2DF_V2DF_QI:
37997 case V4DF_FTYPE_PCV4DF_V4DF_QI:
37998 case V8DF_FTYPE_PCV8DF_V8DF_QI:
37999 case V2DI_FTYPE_PCV2DI_V2DI_QI:
38000 case V4DI_FTYPE_PCV4DI_V4DI_QI:
38001 case V8DI_FTYPE_PCV8DI_V8DI_QI:
38002 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
38003 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
38004 case V8HI_FTYPE_PCV8HI_V8HI_QI:
38005 case V16HI_FTYPE_PCV16HI_V16HI_HI:
38006 case V32HI_FTYPE_PCV32HI_V32HI_SI:
38007 case V16QI_FTYPE_PCV16QI_V16QI_HI:
38008 case V32QI_FTYPE_PCV32QI_V32QI_SI:
38009 case V64QI_FTYPE_PCV64QI_V64QI_DI:
38015 /* These builtins and instructions require the memory
38016 to be properly aligned. */
38017 case CODE_FOR_avx512f_loadv16sf_mask:
38018 case CODE_FOR_avx512f_loadv16si_mask:
38019 case CODE_FOR_avx512f_loadv8df_mask:
38020 case CODE_FOR_avx512f_loadv8di_mask:
38021 case CODE_FOR_avx512vl_loadv8sf_mask:
38022 case CODE_FOR_avx512vl_loadv8si_mask:
38023 case CODE_FOR_avx512vl_loadv4df_mask:
38024 case CODE_FOR_avx512vl_loadv4di_mask:
38025 case CODE_FOR_avx512vl_loadv4sf_mask:
38026 case CODE_FOR_avx512vl_loadv4si_mask:
38027 case CODE_FOR_avx512vl_loadv2df_mask:
38028 case CODE_FOR_avx512vl_loadv2di_mask:
38029 case CODE_FOR_avx512bw_loadv64qi_mask:
38030 case CODE_FOR_avx512vl_loadv32qi_mask:
38031 case CODE_FOR_avx512vl_loadv16qi_mask:
38032 case CODE_FOR_avx512bw_loadv32hi_mask:
38033 case CODE_FOR_avx512vl_loadv16hi_mask:
38034 case CODE_FOR_avx512vl_loadv8hi_mask:
38035 aligned_mem = true;
38041 case VOID_FTYPE_UINT_UINT_UINT:
38042 case VOID_FTYPE_UINT64_UINT_UINT:
38043 case UCHAR_FTYPE_UINT_UINT_UINT:
38044 case UCHAR_FTYPE_UINT64_UINT_UINT:
38047 memory = ARRAY_SIZE (args);
38048 last_arg_constant = true;
38051 gcc_unreachable ();
38054 gcc_assert (nargs <= ARRAY_SIZE (args));
38056 if (klass == store)
38058 arg = CALL_EXPR_ARG (exp, 0);
38059 op = expand_normal (arg);
38060 gcc_assert (target == 0);
38063 op = ix86_zero_extend_to_Pmode (op);
38064 target = gen_rtx_MEM (tmode, op);
38065 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38066 on it. Try to improve it using get_pointer_alignment,
38067 and if the special builtin is one that requires strict
38068 mode alignment, also from it's GET_MODE_ALIGNMENT.
38069 Failure to do so could lead to ix86_legitimate_combined_insn
38070 rejecting all changes to such insns. */
38071 unsigned int align = get_pointer_alignment (arg);
38072 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38073 align = GET_MODE_ALIGNMENT (tmode);
38074 if (MEM_ALIGN (target) < align)
38075 set_mem_align (target, align);
38078 target = force_reg (tmode, op);
38086 || !register_operand (target, tmode)
38087 || GET_MODE (target) != tmode)
38088 target = gen_reg_rtx (tmode);
38091 for (i = 0; i < nargs; i++)
38093 machine_mode mode = insn_p->operand[i + 1].mode;
38096 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38097 op = expand_normal (arg);
38098 match = insn_p->operand[i + 1].predicate (op, mode);
38100 if (last_arg_constant && (i + 1) == nargs)
38104 if (icode == CODE_FOR_lwp_lwpvalsi3
38105 || icode == CODE_FOR_lwp_lwpinssi3
38106 || icode == CODE_FOR_lwp_lwpvaldi3
38107 || icode == CODE_FOR_lwp_lwpinsdi3)
38108 error ("the last argument must be a 32-bit immediate");
38110 error ("the last argument must be an 8-bit immediate");
38118 /* This must be the memory operand. */
38119 op = ix86_zero_extend_to_Pmode (op);
38120 op = gen_rtx_MEM (mode, op);
38121 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38122 on it. Try to improve it using get_pointer_alignment,
38123 and if the special builtin is one that requires strict
38124 mode alignment, also from it's GET_MODE_ALIGNMENT.
38125 Failure to do so could lead to ix86_legitimate_combined_insn
38126 rejecting all changes to such insns. */
38127 unsigned int align = get_pointer_alignment (arg);
38128 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38129 align = GET_MODE_ALIGNMENT (mode);
38130 if (MEM_ALIGN (op) < align)
38131 set_mem_align (op, align);
38135 /* This must be register. */
38136 if (VECTOR_MODE_P (mode))
38137 op = safe_vector_operand (op, mode);
38139 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38140 op = copy_to_mode_reg (mode, op);
38143 op = copy_to_reg (op);
38144 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38150 args[i].mode = mode;
38156 pat = GEN_FCN (icode) (target);
38159 pat = GEN_FCN (icode) (target, args[0].op);
38162 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38165 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38168 gcc_unreachable ();
38174 return klass == store ? 0 : target;
38177 /* Return the integer constant in ARG. Constrain it to be in the range
38178 of the subparts of VEC_TYPE; issue an error if not. */
38181 get_element_number (tree vec_type, tree arg)
38183 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38185 if (!tree_fits_uhwi_p (arg)
38186 || (elt = tree_to_uhwi (arg), elt > max))
38188 error ("selector must be an integer constant in the range 0..%wi", max);
38195 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38196 ix86_expand_vector_init. We DO have language-level syntax for this, in
38197 the form of (type){ init-list }. Except that since we can't place emms
38198 instructions from inside the compiler, we can't allow the use of MMX
38199 registers unless the user explicitly asks for it. So we do *not* define
38200 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
38201 we have builtins invoked by mmintrin.h that gives us license to emit
38202 these sorts of instructions. */
38205 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38207 machine_mode tmode = TYPE_MODE (type);
38208 machine_mode inner_mode = GET_MODE_INNER (tmode);
38209 int i, n_elt = GET_MODE_NUNITS (tmode);
38210 rtvec v = rtvec_alloc (n_elt);
38212 gcc_assert (VECTOR_MODE_P (tmode));
38213 gcc_assert (call_expr_nargs (exp) == n_elt);
38215 for (i = 0; i < n_elt; ++i)
38217 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38218 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38221 if (!target || !register_operand (target, tmode))
38222 target = gen_reg_rtx (tmode);
38224 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38228 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38229 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38230 had a language-level syntax for referencing vector elements. */
38233 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38235 machine_mode tmode, mode0;
38240 arg0 = CALL_EXPR_ARG (exp, 0);
38241 arg1 = CALL_EXPR_ARG (exp, 1);
38243 op0 = expand_normal (arg0);
38244 elt = get_element_number (TREE_TYPE (arg0), arg1);
38246 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38247 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38248 gcc_assert (VECTOR_MODE_P (mode0));
38250 op0 = force_reg (mode0, op0);
38252 if (optimize || !target || !register_operand (target, tmode))
38253 target = gen_reg_rtx (tmode);
38255 ix86_expand_vector_extract (true, target, op0, elt);
38260 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38261 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38262 a language-level syntax for referencing vector elements. */
38265 ix86_expand_vec_set_builtin (tree exp)
38267 machine_mode tmode, mode1;
38268 tree arg0, arg1, arg2;
38270 rtx op0, op1, target;
38272 arg0 = CALL_EXPR_ARG (exp, 0);
38273 arg1 = CALL_EXPR_ARG (exp, 1);
38274 arg2 = CALL_EXPR_ARG (exp, 2);
38276 tmode = TYPE_MODE (TREE_TYPE (arg0));
38277 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38278 gcc_assert (VECTOR_MODE_P (tmode));
38280 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38281 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38282 elt = get_element_number (TREE_TYPE (arg0), arg2);
38284 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38285 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38287 op0 = force_reg (tmode, op0);
38288 op1 = force_reg (mode1, op1);
38290 /* OP0 is the source of these builtin functions and shouldn't be
38291 modified. Create a copy, use it and return it as target. */
38292 target = gen_reg_rtx (tmode);
38293 emit_move_insn (target, op0);
38294 ix86_expand_vector_set (true, target, op1, elt);
38299 /* Emit conditional move of SRC to DST with condition
38302 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38308 t = ix86_expand_compare (code, op1, op2);
38309 emit_insn (gen_rtx_SET (VOIDmode, dst,
38310 gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38315 rtx nomove = gen_label_rtx ();
38316 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38317 const0_rtx, GET_MODE (op1), 1, nomove);
38318 emit_move_insn (dst, src);
38319 emit_label (nomove);
38323 /* Choose max of DST and SRC and put it to DST. */
38325 ix86_emit_move_max (rtx dst, rtx src)
38327 ix86_emit_cmove (dst, src, LTU, dst, src);
38330 /* Expand an expression EXP that calls a built-in function,
38331 with result going to TARGET if that's convenient
38332 (and in mode MODE if that's convenient).
38333 SUBTARGET may be used as the target for computing one of EXP's operands.
38334 IGNORE is nonzero if the value is to be ignored. */
38337 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38338 machine_mode mode, int ignore)
38340 const struct builtin_description *d;
38342 enum insn_code icode;
38343 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38344 tree arg0, arg1, arg2, arg3, arg4;
38345 rtx op0, op1, op2, op3, op4, pat, insn;
38346 machine_mode mode0, mode1, mode2, mode3, mode4;
38347 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38349 /* For CPU builtins that can be folded, fold first and expand the fold. */
38352 case IX86_BUILTIN_CPU_INIT:
38354 /* Make it call __cpu_indicator_init in libgcc. */
38355 tree call_expr, fndecl, type;
38356 type = build_function_type_list (integer_type_node, NULL_TREE);
38357 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38358 call_expr = build_call_expr (fndecl, 0);
38359 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38361 case IX86_BUILTIN_CPU_IS:
38362 case IX86_BUILTIN_CPU_SUPPORTS:
38364 tree arg0 = CALL_EXPR_ARG (exp, 0);
38365 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38366 gcc_assert (fold_expr != NULL_TREE);
38367 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38371 /* Determine whether the builtin function is available under the current ISA.
38372 Originally the builtin was not created if it wasn't applicable to the
38373 current ISA based on the command line switches. With function specific
38374 options, we need to check in the context of the function making the call
38375 whether it is supported. */
38376 if (ix86_builtins_isa[fcode].isa
38377 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38379 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38380 NULL, (enum fpmath_unit) 0, false);
38383 error ("%qE needs unknown isa option", fndecl);
38386 gcc_assert (opts != NULL);
38387 error ("%qE needs isa option %s", fndecl, opts);
38395 case IX86_BUILTIN_BNDMK:
38397 || GET_MODE (target) != BNDmode
38398 || !register_operand (target, BNDmode))
38399 target = gen_reg_rtx (BNDmode);
38401 arg0 = CALL_EXPR_ARG (exp, 0);
38402 arg1 = CALL_EXPR_ARG (exp, 1);
38404 op0 = expand_normal (arg0);
38405 op1 = expand_normal (arg1);
38407 if (!register_operand (op0, Pmode))
38408 op0 = ix86_zero_extend_to_Pmode (op0);
38409 if (!register_operand (op1, Pmode))
38410 op1 = ix86_zero_extend_to_Pmode (op1);
38412 /* Builtin arg1 is size of block but instruction op1 should
38414 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38415 NULL_RTX, 1, OPTAB_DIRECT);
38417 emit_insn (BNDmode == BND64mode
38418 ? gen_bnd64_mk (target, op0, op1)
38419 : gen_bnd32_mk (target, op0, op1));
38422 case IX86_BUILTIN_BNDSTX:
38423 arg0 = CALL_EXPR_ARG (exp, 0);
38424 arg1 = CALL_EXPR_ARG (exp, 1);
38425 arg2 = CALL_EXPR_ARG (exp, 2);
38427 op0 = expand_normal (arg0);
38428 op1 = expand_normal (arg1);
38429 op2 = expand_normal (arg2);
38431 if (!register_operand (op0, Pmode))
38432 op0 = ix86_zero_extend_to_Pmode (op0);
38433 if (!register_operand (op1, BNDmode))
38434 op1 = copy_to_mode_reg (BNDmode, op1);
38435 if (!register_operand (op2, Pmode))
38436 op2 = ix86_zero_extend_to_Pmode (op2);
38438 emit_insn (BNDmode == BND64mode
38439 ? gen_bnd64_stx (op2, op0, op1)
38440 : gen_bnd32_stx (op2, op0, op1));
38443 case IX86_BUILTIN_BNDLDX:
38445 || GET_MODE (target) != BNDmode
38446 || !register_operand (target, BNDmode))
38447 target = gen_reg_rtx (BNDmode);
38449 arg0 = CALL_EXPR_ARG (exp, 0);
38450 arg1 = CALL_EXPR_ARG (exp, 1);
38452 op0 = expand_normal (arg0);
38453 op1 = expand_normal (arg1);
38455 if (!register_operand (op0, Pmode))
38456 op0 = ix86_zero_extend_to_Pmode (op0);
38457 if (!register_operand (op1, Pmode))
38458 op1 = ix86_zero_extend_to_Pmode (op1);
38460 emit_insn (BNDmode == BND64mode
38461 ? gen_bnd64_ldx (target, op0, op1)
38462 : gen_bnd32_ldx (target, op0, op1));
38465 case IX86_BUILTIN_BNDCL:
38466 arg0 = CALL_EXPR_ARG (exp, 0);
38467 arg1 = CALL_EXPR_ARG (exp, 1);
38469 op0 = expand_normal (arg0);
38470 op1 = expand_normal (arg1);
38472 if (!register_operand (op0, Pmode))
38473 op0 = ix86_zero_extend_to_Pmode (op0);
38474 if (!register_operand (op1, BNDmode))
38475 op1 = copy_to_mode_reg (BNDmode, op1);
38477 emit_insn (BNDmode == BND64mode
38478 ? gen_bnd64_cl (op1, op0)
38479 : gen_bnd32_cl (op1, op0));
38482 case IX86_BUILTIN_BNDCU:
38483 arg0 = CALL_EXPR_ARG (exp, 0);
38484 arg1 = CALL_EXPR_ARG (exp, 1);
38486 op0 = expand_normal (arg0);
38487 op1 = expand_normal (arg1);
38489 if (!register_operand (op0, Pmode))
38490 op0 = ix86_zero_extend_to_Pmode (op0);
38491 if (!register_operand (op1, BNDmode))
38492 op1 = copy_to_mode_reg (BNDmode, op1);
38494 emit_insn (BNDmode == BND64mode
38495 ? gen_bnd64_cu (op1, op0)
38496 : gen_bnd32_cu (op1, op0));
38499 case IX86_BUILTIN_BNDRET:
38500 arg0 = CALL_EXPR_ARG (exp, 0);
38501 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38502 target = chkp_get_rtl_bounds (arg0);
38504 /* If no bounds were specified for returned value,
38505 then use INIT bounds. It usually happens when
38506 some built-in function is expanded. */
38509 rtx t1 = gen_reg_rtx (Pmode);
38510 rtx t2 = gen_reg_rtx (Pmode);
38511 target = gen_reg_rtx (BNDmode);
38512 emit_move_insn (t1, const0_rtx);
38513 emit_move_insn (t2, constm1_rtx);
38514 emit_insn (BNDmode == BND64mode
38515 ? gen_bnd64_mk (target, t1, t2)
38516 : gen_bnd32_mk (target, t1, t2));
38519 gcc_assert (target && REG_P (target));
38522 case IX86_BUILTIN_BNDNARROW:
38524 rtx m1, m1h1, m1h2, lb, ub, t1;
38526 /* Return value and lb. */
38527 arg0 = CALL_EXPR_ARG (exp, 0);
38529 arg1 = CALL_EXPR_ARG (exp, 1);
38531 arg2 = CALL_EXPR_ARG (exp, 2);
38533 lb = expand_normal (arg0);
38534 op1 = expand_normal (arg1);
38535 op2 = expand_normal (arg2);
38537 /* Size was passed but we need to use (size - 1) as for bndmk. */
38538 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38539 NULL_RTX, 1, OPTAB_DIRECT);
38541 /* Add LB to size and inverse to get UB. */
38542 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38543 op2, 1, OPTAB_DIRECT);
38544 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38546 if (!register_operand (lb, Pmode))
38547 lb = ix86_zero_extend_to_Pmode (lb);
38548 if (!register_operand (ub, Pmode))
38549 ub = ix86_zero_extend_to_Pmode (ub);
38551 /* We need to move bounds to memory before any computations. */
38556 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38557 emit_move_insn (m1, op1);
38560 /* Generate mem expression to be used for access to LB and UB. */
38561 m1h1 = adjust_address (m1, Pmode, 0);
38562 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38564 t1 = gen_reg_rtx (Pmode);
38567 emit_move_insn (t1, m1h1);
38568 ix86_emit_move_max (t1, lb);
38569 emit_move_insn (m1h1, t1);
38571 /* Compute UB. UB is stored in 1's complement form. Therefore
38572 we also use max here. */
38573 emit_move_insn (t1, m1h2);
38574 ix86_emit_move_max (t1, ub);
38575 emit_move_insn (m1h2, t1);
38577 op2 = gen_reg_rtx (BNDmode);
38578 emit_move_insn (op2, m1);
38580 return chkp_join_splitted_slot (lb, op2);
38583 case IX86_BUILTIN_BNDINT:
38585 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38588 || GET_MODE (target) != BNDmode
38589 || !register_operand (target, BNDmode))
38590 target = gen_reg_rtx (BNDmode);
38592 arg0 = CALL_EXPR_ARG (exp, 0);
38593 arg1 = CALL_EXPR_ARG (exp, 1);
38595 op0 = expand_normal (arg0);
38596 op1 = expand_normal (arg1);
38598 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38599 rh1 = adjust_address (res, Pmode, 0);
38600 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38602 /* Put first bounds to temporaries. */
38603 lb1 = gen_reg_rtx (Pmode);
38604 ub1 = gen_reg_rtx (Pmode);
38607 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38608 emit_move_insn (ub1, adjust_address (op0, Pmode,
38609 GET_MODE_SIZE (Pmode)));
38613 emit_move_insn (res, op0);
38614 emit_move_insn (lb1, rh1);
38615 emit_move_insn (ub1, rh2);
38618 /* Put second bounds to temporaries. */
38619 lb2 = gen_reg_rtx (Pmode);
38620 ub2 = gen_reg_rtx (Pmode);
38623 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38624 emit_move_insn (ub2, adjust_address (op1, Pmode,
38625 GET_MODE_SIZE (Pmode)));
38629 emit_move_insn (res, op1);
38630 emit_move_insn (lb2, rh1);
38631 emit_move_insn (ub2, rh2);
38635 ix86_emit_move_max (lb1, lb2);
38636 emit_move_insn (rh1, lb1);
38638 /* Compute UB. UB is stored in 1's complement form. Therefore
38639 we also use max here. */
38640 ix86_emit_move_max (ub1, ub2);
38641 emit_move_insn (rh2, ub1);
38643 emit_move_insn (target, res);
38648 case IX86_BUILTIN_SIZEOF:
38654 || GET_MODE (target) != Pmode
38655 || !register_operand (target, Pmode))
38656 target = gen_reg_rtx (Pmode);
38658 arg0 = CALL_EXPR_ARG (exp, 0);
38659 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
38661 name = DECL_ASSEMBLER_NAME (arg0);
38662 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
38664 emit_insn (Pmode == SImode
38665 ? gen_move_size_reloc_si (target, symbol)
38666 : gen_move_size_reloc_di (target, symbol));
38671 case IX86_BUILTIN_BNDLOWER:
38676 || GET_MODE (target) != Pmode
38677 || !register_operand (target, Pmode))
38678 target = gen_reg_rtx (Pmode);
38680 arg0 = CALL_EXPR_ARG (exp, 0);
38681 op0 = expand_normal (arg0);
38683 /* We need to move bounds to memory first. */
38688 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38689 emit_move_insn (mem, op0);
38692 /* Generate mem expression to access LB and load it. */
38693 hmem = adjust_address (mem, Pmode, 0);
38694 emit_move_insn (target, hmem);
38699 case IX86_BUILTIN_BNDUPPER:
38701 rtx mem, hmem, res;
38704 || GET_MODE (target) != Pmode
38705 || !register_operand (target, Pmode))
38706 target = gen_reg_rtx (Pmode);
38708 arg0 = CALL_EXPR_ARG (exp, 0);
38709 op0 = expand_normal (arg0);
38711 /* We need to move bounds to memory first. */
38716 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38717 emit_move_insn (mem, op0);
38720 /* Generate mem expression to access UB. */
38721 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
38723 /* We need to inverse all bits of UB. */
38724 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
38727 emit_move_insn (target, res);
38732 case IX86_BUILTIN_MASKMOVQ:
38733 case IX86_BUILTIN_MASKMOVDQU:
38734 icode = (fcode == IX86_BUILTIN_MASKMOVQ
38735 ? CODE_FOR_mmx_maskmovq
38736 : CODE_FOR_sse2_maskmovdqu);
38737 /* Note the arg order is different from the operand order. */
38738 arg1 = CALL_EXPR_ARG (exp, 0);
38739 arg2 = CALL_EXPR_ARG (exp, 1);
38740 arg0 = CALL_EXPR_ARG (exp, 2);
38741 op0 = expand_normal (arg0);
38742 op1 = expand_normal (arg1);
38743 op2 = expand_normal (arg2);
38744 mode0 = insn_data[icode].operand[0].mode;
38745 mode1 = insn_data[icode].operand[1].mode;
38746 mode2 = insn_data[icode].operand[2].mode;
38748 op0 = ix86_zero_extend_to_Pmode (op0);
38749 op0 = gen_rtx_MEM (mode1, op0);
38751 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38752 op0 = copy_to_mode_reg (mode0, op0);
38753 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38754 op1 = copy_to_mode_reg (mode1, op1);
38755 if (!insn_data[icode].operand[2].predicate (op2, mode2))
38756 op2 = copy_to_mode_reg (mode2, op2);
38757 pat = GEN_FCN (icode) (op0, op1, op2);
38763 case IX86_BUILTIN_LDMXCSR:
38764 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
38765 target = assign_386_stack_local (SImode, SLOT_TEMP);
38766 emit_move_insn (target, op0);
38767 emit_insn (gen_sse_ldmxcsr (target));
38770 case IX86_BUILTIN_STMXCSR:
38771 target = assign_386_stack_local (SImode, SLOT_TEMP);
38772 emit_insn (gen_sse_stmxcsr (target));
38773 return copy_to_mode_reg (SImode, target);
38775 case IX86_BUILTIN_CLFLUSH:
38776 arg0 = CALL_EXPR_ARG (exp, 0);
38777 op0 = expand_normal (arg0);
38778 icode = CODE_FOR_sse2_clflush;
38779 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38780 op0 = ix86_zero_extend_to_Pmode (op0);
38782 emit_insn (gen_sse2_clflush (op0));
38785 case IX86_BUILTIN_CLWB:
38786 arg0 = CALL_EXPR_ARG (exp, 0);
38787 op0 = expand_normal (arg0);
38788 icode = CODE_FOR_clwb;
38789 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38790 op0 = ix86_zero_extend_to_Pmode (op0);
38792 emit_insn (gen_clwb (op0));
38795 case IX86_BUILTIN_CLFLUSHOPT:
38796 arg0 = CALL_EXPR_ARG (exp, 0);
38797 op0 = expand_normal (arg0);
38798 icode = CODE_FOR_clflushopt;
38799 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38800 op0 = ix86_zero_extend_to_Pmode (op0);
38802 emit_insn (gen_clflushopt (op0));
38805 case IX86_BUILTIN_MONITOR:
38806 arg0 = CALL_EXPR_ARG (exp, 0);
38807 arg1 = CALL_EXPR_ARG (exp, 1);
38808 arg2 = CALL_EXPR_ARG (exp, 2);
38809 op0 = expand_normal (arg0);
38810 op1 = expand_normal (arg1);
38811 op2 = expand_normal (arg2);
38813 op0 = ix86_zero_extend_to_Pmode (op0);
38815 op1 = copy_to_mode_reg (SImode, op1);
38817 op2 = copy_to_mode_reg (SImode, op2);
38818 emit_insn (ix86_gen_monitor (op0, op1, op2));
38821 case IX86_BUILTIN_MWAIT:
38822 arg0 = CALL_EXPR_ARG (exp, 0);
38823 arg1 = CALL_EXPR_ARG (exp, 1);
38824 op0 = expand_normal (arg0);
38825 op1 = expand_normal (arg1);
38827 op0 = copy_to_mode_reg (SImode, op0);
38829 op1 = copy_to_mode_reg (SImode, op1);
38830 emit_insn (gen_sse3_mwait (op0, op1));
38833 case IX86_BUILTIN_VEC_INIT_V2SI:
38834 case IX86_BUILTIN_VEC_INIT_V4HI:
38835 case IX86_BUILTIN_VEC_INIT_V8QI:
38836 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
38838 case IX86_BUILTIN_VEC_EXT_V2DF:
38839 case IX86_BUILTIN_VEC_EXT_V2DI:
38840 case IX86_BUILTIN_VEC_EXT_V4SF:
38841 case IX86_BUILTIN_VEC_EXT_V4SI:
38842 case IX86_BUILTIN_VEC_EXT_V8HI:
38843 case IX86_BUILTIN_VEC_EXT_V2SI:
38844 case IX86_BUILTIN_VEC_EXT_V4HI:
38845 case IX86_BUILTIN_VEC_EXT_V16QI:
38846 return ix86_expand_vec_ext_builtin (exp, target);
38848 case IX86_BUILTIN_VEC_SET_V2DI:
38849 case IX86_BUILTIN_VEC_SET_V4SF:
38850 case IX86_BUILTIN_VEC_SET_V4SI:
38851 case IX86_BUILTIN_VEC_SET_V8HI:
38852 case IX86_BUILTIN_VEC_SET_V4HI:
38853 case IX86_BUILTIN_VEC_SET_V16QI:
38854 return ix86_expand_vec_set_builtin (exp);
38856 case IX86_BUILTIN_INFQ:
38857 case IX86_BUILTIN_HUGE_VALQ:
38859 REAL_VALUE_TYPE inf;
38863 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
38865 tmp = validize_mem (force_const_mem (mode, tmp));
38868 target = gen_reg_rtx (mode);
38870 emit_move_insn (target, tmp);
38874 case IX86_BUILTIN_RDPMC:
38875 case IX86_BUILTIN_RDTSC:
38876 case IX86_BUILTIN_RDTSCP:
38878 op0 = gen_reg_rtx (DImode);
38879 op1 = gen_reg_rtx (DImode);
38881 if (fcode == IX86_BUILTIN_RDPMC)
38883 arg0 = CALL_EXPR_ARG (exp, 0);
38884 op2 = expand_normal (arg0);
38885 if (!register_operand (op2, SImode))
38886 op2 = copy_to_mode_reg (SImode, op2);
38888 insn = (TARGET_64BIT
38889 ? gen_rdpmc_rex64 (op0, op1, op2)
38890 : gen_rdpmc (op0, op2));
38893 else if (fcode == IX86_BUILTIN_RDTSC)
38895 insn = (TARGET_64BIT
38896 ? gen_rdtsc_rex64 (op0, op1)
38897 : gen_rdtsc (op0));
38902 op2 = gen_reg_rtx (SImode);
38904 insn = (TARGET_64BIT
38905 ? gen_rdtscp_rex64 (op0, op1, op2)
38906 : gen_rdtscp (op0, op2));
38909 arg0 = CALL_EXPR_ARG (exp, 0);
38910 op4 = expand_normal (arg0);
38911 if (!address_operand (op4, VOIDmode))
38913 op4 = convert_memory_address (Pmode, op4);
38914 op4 = copy_addr_to_reg (op4);
38916 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
38921 /* mode is VOIDmode if __builtin_rd* has been called
38923 if (mode == VOIDmode)
38925 target = gen_reg_rtx (mode);
38930 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
38931 op1, 1, OPTAB_DIRECT);
38932 op0 = expand_simple_binop (DImode, IOR, op0, op1,
38933 op0, 1, OPTAB_DIRECT);
38936 emit_move_insn (target, op0);
38939 case IX86_BUILTIN_FXSAVE:
38940 case IX86_BUILTIN_FXRSTOR:
38941 case IX86_BUILTIN_FXSAVE64:
38942 case IX86_BUILTIN_FXRSTOR64:
38943 case IX86_BUILTIN_FNSTENV:
38944 case IX86_BUILTIN_FLDENV:
38948 case IX86_BUILTIN_FXSAVE:
38949 icode = CODE_FOR_fxsave;
38951 case IX86_BUILTIN_FXRSTOR:
38952 icode = CODE_FOR_fxrstor;
38954 case IX86_BUILTIN_FXSAVE64:
38955 icode = CODE_FOR_fxsave64;
38957 case IX86_BUILTIN_FXRSTOR64:
38958 icode = CODE_FOR_fxrstor64;
38960 case IX86_BUILTIN_FNSTENV:
38961 icode = CODE_FOR_fnstenv;
38963 case IX86_BUILTIN_FLDENV:
38964 icode = CODE_FOR_fldenv;
38967 gcc_unreachable ();
38970 arg0 = CALL_EXPR_ARG (exp, 0);
38971 op0 = expand_normal (arg0);
38973 if (!address_operand (op0, VOIDmode))
38975 op0 = convert_memory_address (Pmode, op0);
38976 op0 = copy_addr_to_reg (op0);
38978 op0 = gen_rtx_MEM (mode0, op0);
38980 pat = GEN_FCN (icode) (op0);
38985 case IX86_BUILTIN_XSAVE:
38986 case IX86_BUILTIN_XRSTOR:
38987 case IX86_BUILTIN_XSAVE64:
38988 case IX86_BUILTIN_XRSTOR64:
38989 case IX86_BUILTIN_XSAVEOPT:
38990 case IX86_BUILTIN_XSAVEOPT64:
38991 case IX86_BUILTIN_XSAVES:
38992 case IX86_BUILTIN_XRSTORS:
38993 case IX86_BUILTIN_XSAVES64:
38994 case IX86_BUILTIN_XRSTORS64:
38995 case IX86_BUILTIN_XSAVEC:
38996 case IX86_BUILTIN_XSAVEC64:
38997 arg0 = CALL_EXPR_ARG (exp, 0);
38998 arg1 = CALL_EXPR_ARG (exp, 1);
38999 op0 = expand_normal (arg0);
39000 op1 = expand_normal (arg1);
39002 if (!address_operand (op0, VOIDmode))
39004 op0 = convert_memory_address (Pmode, op0);
39005 op0 = copy_addr_to_reg (op0);
39007 op0 = gen_rtx_MEM (BLKmode, op0);
39009 op1 = force_reg (DImode, op1);
39013 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
39014 NULL, 1, OPTAB_DIRECT);
39017 case IX86_BUILTIN_XSAVE:
39018 icode = CODE_FOR_xsave_rex64;
39020 case IX86_BUILTIN_XRSTOR:
39021 icode = CODE_FOR_xrstor_rex64;
39023 case IX86_BUILTIN_XSAVE64:
39024 icode = CODE_FOR_xsave64;
39026 case IX86_BUILTIN_XRSTOR64:
39027 icode = CODE_FOR_xrstor64;
39029 case IX86_BUILTIN_XSAVEOPT:
39030 icode = CODE_FOR_xsaveopt_rex64;
39032 case IX86_BUILTIN_XSAVEOPT64:
39033 icode = CODE_FOR_xsaveopt64;
39035 case IX86_BUILTIN_XSAVES:
39036 icode = CODE_FOR_xsaves_rex64;
39038 case IX86_BUILTIN_XRSTORS:
39039 icode = CODE_FOR_xrstors_rex64;
39041 case IX86_BUILTIN_XSAVES64:
39042 icode = CODE_FOR_xsaves64;
39044 case IX86_BUILTIN_XRSTORS64:
39045 icode = CODE_FOR_xrstors64;
39047 case IX86_BUILTIN_XSAVEC:
39048 icode = CODE_FOR_xsavec_rex64;
39050 case IX86_BUILTIN_XSAVEC64:
39051 icode = CODE_FOR_xsavec64;
39054 gcc_unreachable ();
39057 op2 = gen_lowpart (SImode, op2);
39058 op1 = gen_lowpart (SImode, op1);
39059 pat = GEN_FCN (icode) (op0, op1, op2);
39065 case IX86_BUILTIN_XSAVE:
39066 icode = CODE_FOR_xsave;
39068 case IX86_BUILTIN_XRSTOR:
39069 icode = CODE_FOR_xrstor;
39071 case IX86_BUILTIN_XSAVEOPT:
39072 icode = CODE_FOR_xsaveopt;
39074 case IX86_BUILTIN_XSAVES:
39075 icode = CODE_FOR_xsaves;
39077 case IX86_BUILTIN_XRSTORS:
39078 icode = CODE_FOR_xrstors;
39080 case IX86_BUILTIN_XSAVEC:
39081 icode = CODE_FOR_xsavec;
39084 gcc_unreachable ();
39086 pat = GEN_FCN (icode) (op0, op1);
39093 case IX86_BUILTIN_LLWPCB:
39094 arg0 = CALL_EXPR_ARG (exp, 0);
39095 op0 = expand_normal (arg0);
39096 icode = CODE_FOR_lwp_llwpcb;
39097 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39098 op0 = ix86_zero_extend_to_Pmode (op0);
39099 emit_insn (gen_lwp_llwpcb (op0));
39102 case IX86_BUILTIN_SLWPCB:
39103 icode = CODE_FOR_lwp_slwpcb;
39105 || !insn_data[icode].operand[0].predicate (target, Pmode))
39106 target = gen_reg_rtx (Pmode);
39107 emit_insn (gen_lwp_slwpcb (target));
39110 case IX86_BUILTIN_BEXTRI32:
39111 case IX86_BUILTIN_BEXTRI64:
39112 arg0 = CALL_EXPR_ARG (exp, 0);
39113 arg1 = CALL_EXPR_ARG (exp, 1);
39114 op0 = expand_normal (arg0);
39115 op1 = expand_normal (arg1);
39116 icode = (fcode == IX86_BUILTIN_BEXTRI32
39117 ? CODE_FOR_tbm_bextri_si
39118 : CODE_FOR_tbm_bextri_di);
39119 if (!CONST_INT_P (op1))
39121 error ("last argument must be an immediate");
39126 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39127 unsigned char lsb_index = INTVAL (op1) & 0xFF;
39128 op1 = GEN_INT (length);
39129 op2 = GEN_INT (lsb_index);
39130 pat = GEN_FCN (icode) (target, op0, op1, op2);
39136 case IX86_BUILTIN_RDRAND16_STEP:
39137 icode = CODE_FOR_rdrandhi_1;
39141 case IX86_BUILTIN_RDRAND32_STEP:
39142 icode = CODE_FOR_rdrandsi_1;
39146 case IX86_BUILTIN_RDRAND64_STEP:
39147 icode = CODE_FOR_rdranddi_1;
39151 op0 = gen_reg_rtx (mode0);
39152 emit_insn (GEN_FCN (icode) (op0));
39154 arg0 = CALL_EXPR_ARG (exp, 0);
39155 op1 = expand_normal (arg0);
39156 if (!address_operand (op1, VOIDmode))
39158 op1 = convert_memory_address (Pmode, op1);
39159 op1 = copy_addr_to_reg (op1);
39161 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39163 op1 = gen_reg_rtx (SImode);
39164 emit_move_insn (op1, CONST1_RTX (SImode));
39166 /* Emit SImode conditional move. */
39167 if (mode0 == HImode)
39169 op2 = gen_reg_rtx (SImode);
39170 emit_insn (gen_zero_extendhisi2 (op2, op0));
39172 else if (mode0 == SImode)
39175 op2 = gen_rtx_SUBREG (SImode, op0, 0);
39178 || !register_operand (target, SImode))
39179 target = gen_reg_rtx (SImode);
39181 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39183 emit_insn (gen_rtx_SET (VOIDmode, target,
39184 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39187 case IX86_BUILTIN_RDSEED16_STEP:
39188 icode = CODE_FOR_rdseedhi_1;
39192 case IX86_BUILTIN_RDSEED32_STEP:
39193 icode = CODE_FOR_rdseedsi_1;
39197 case IX86_BUILTIN_RDSEED64_STEP:
39198 icode = CODE_FOR_rdseeddi_1;
39202 op0 = gen_reg_rtx (mode0);
39203 emit_insn (GEN_FCN (icode) (op0));
39205 arg0 = CALL_EXPR_ARG (exp, 0);
39206 op1 = expand_normal (arg0);
39207 if (!address_operand (op1, VOIDmode))
39209 op1 = convert_memory_address (Pmode, op1);
39210 op1 = copy_addr_to_reg (op1);
39212 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39214 op2 = gen_reg_rtx (QImode);
39216 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39218 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
39221 || !register_operand (target, SImode))
39222 target = gen_reg_rtx (SImode);
39224 emit_insn (gen_zero_extendqisi2 (target, op2));
39227 case IX86_BUILTIN_SBB32:
39228 icode = CODE_FOR_subsi3_carry;
39232 case IX86_BUILTIN_SBB64:
39233 icode = CODE_FOR_subdi3_carry;
39237 case IX86_BUILTIN_ADDCARRYX32:
39238 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
39242 case IX86_BUILTIN_ADDCARRYX64:
39243 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
39247 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39248 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39249 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39250 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39252 op0 = gen_reg_rtx (QImode);
39254 /* Generate CF from input operand. */
39255 op1 = expand_normal (arg0);
39256 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39257 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
39259 /* Gen ADCX instruction to compute X+Y+CF. */
39260 op2 = expand_normal (arg1);
39261 op3 = expand_normal (arg2);
39264 op2 = copy_to_mode_reg (mode0, op2);
39266 op3 = copy_to_mode_reg (mode0, op3);
39268 op0 = gen_reg_rtx (mode0);
39270 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
39271 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
39272 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
39274 /* Store the result. */
39275 op4 = expand_normal (arg3);
39276 if (!address_operand (op4, VOIDmode))
39278 op4 = convert_memory_address (Pmode, op4);
39279 op4 = copy_addr_to_reg (op4);
39281 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39283 /* Return current CF value. */
39285 target = gen_reg_rtx (QImode);
39287 PUT_MODE (pat, QImode);
39288 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
39291 case IX86_BUILTIN_READ_FLAGS:
39292 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39295 || target == NULL_RTX
39296 || !nonimmediate_operand (target, word_mode)
39297 || GET_MODE (target) != word_mode)
39298 target = gen_reg_rtx (word_mode);
39300 emit_insn (gen_pop (target));
39303 case IX86_BUILTIN_WRITE_FLAGS:
39305 arg0 = CALL_EXPR_ARG (exp, 0);
39306 op0 = expand_normal (arg0);
39307 if (!general_no_elim_operand (op0, word_mode))
39308 op0 = copy_to_mode_reg (word_mode, op0);
39310 emit_insn (gen_push (op0));
39311 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39314 case IX86_BUILTIN_KORTESTC16:
39315 icode = CODE_FOR_kortestchi;
39320 case IX86_BUILTIN_KORTESTZ16:
39321 icode = CODE_FOR_kortestzhi;
39326 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39327 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39328 op0 = expand_normal (arg0);
39329 op1 = expand_normal (arg1);
39331 op0 = copy_to_reg (op0);
39332 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39333 op1 = copy_to_reg (op1);
39334 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39336 target = gen_reg_rtx (QImode);
39337 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
39339 /* Emit kortest. */
39340 emit_insn (GEN_FCN (icode) (op0, op1));
39341 /* And use setcc to return result from flags. */
39342 ix86_expand_setcc (target, EQ,
39343 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39346 case IX86_BUILTIN_GATHERSIV2DF:
39347 icode = CODE_FOR_avx2_gathersiv2df;
39349 case IX86_BUILTIN_GATHERSIV4DF:
39350 icode = CODE_FOR_avx2_gathersiv4df;
39352 case IX86_BUILTIN_GATHERDIV2DF:
39353 icode = CODE_FOR_avx2_gatherdiv2df;
39355 case IX86_BUILTIN_GATHERDIV4DF:
39356 icode = CODE_FOR_avx2_gatherdiv4df;
39358 case IX86_BUILTIN_GATHERSIV4SF:
39359 icode = CODE_FOR_avx2_gathersiv4sf;
39361 case IX86_BUILTIN_GATHERSIV8SF:
39362 icode = CODE_FOR_avx2_gathersiv8sf;
39364 case IX86_BUILTIN_GATHERDIV4SF:
39365 icode = CODE_FOR_avx2_gatherdiv4sf;
39367 case IX86_BUILTIN_GATHERDIV8SF:
39368 icode = CODE_FOR_avx2_gatherdiv8sf;
39370 case IX86_BUILTIN_GATHERSIV2DI:
39371 icode = CODE_FOR_avx2_gathersiv2di;
39373 case IX86_BUILTIN_GATHERSIV4DI:
39374 icode = CODE_FOR_avx2_gathersiv4di;
39376 case IX86_BUILTIN_GATHERDIV2DI:
39377 icode = CODE_FOR_avx2_gatherdiv2di;
39379 case IX86_BUILTIN_GATHERDIV4DI:
39380 icode = CODE_FOR_avx2_gatherdiv4di;
39382 case IX86_BUILTIN_GATHERSIV4SI:
39383 icode = CODE_FOR_avx2_gathersiv4si;
39385 case IX86_BUILTIN_GATHERSIV8SI:
39386 icode = CODE_FOR_avx2_gathersiv8si;
39388 case IX86_BUILTIN_GATHERDIV4SI:
39389 icode = CODE_FOR_avx2_gatherdiv4si;
39391 case IX86_BUILTIN_GATHERDIV8SI:
39392 icode = CODE_FOR_avx2_gatherdiv8si;
39394 case IX86_BUILTIN_GATHERALTSIV4DF:
39395 icode = CODE_FOR_avx2_gathersiv4df;
39397 case IX86_BUILTIN_GATHERALTDIV8SF:
39398 icode = CODE_FOR_avx2_gatherdiv8sf;
39400 case IX86_BUILTIN_GATHERALTSIV4DI:
39401 icode = CODE_FOR_avx2_gathersiv4di;
39403 case IX86_BUILTIN_GATHERALTDIV8SI:
39404 icode = CODE_FOR_avx2_gatherdiv8si;
39406 case IX86_BUILTIN_GATHER3SIV16SF:
39407 icode = CODE_FOR_avx512f_gathersiv16sf;
39409 case IX86_BUILTIN_GATHER3SIV8DF:
39410 icode = CODE_FOR_avx512f_gathersiv8df;
39412 case IX86_BUILTIN_GATHER3DIV16SF:
39413 icode = CODE_FOR_avx512f_gatherdiv16sf;
39415 case IX86_BUILTIN_GATHER3DIV8DF:
39416 icode = CODE_FOR_avx512f_gatherdiv8df;
39418 case IX86_BUILTIN_GATHER3SIV16SI:
39419 icode = CODE_FOR_avx512f_gathersiv16si;
39421 case IX86_BUILTIN_GATHER3SIV8DI:
39422 icode = CODE_FOR_avx512f_gathersiv8di;
39424 case IX86_BUILTIN_GATHER3DIV16SI:
39425 icode = CODE_FOR_avx512f_gatherdiv16si;
39427 case IX86_BUILTIN_GATHER3DIV8DI:
39428 icode = CODE_FOR_avx512f_gatherdiv8di;
39430 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39431 icode = CODE_FOR_avx512f_gathersiv8df;
39433 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39434 icode = CODE_FOR_avx512f_gatherdiv16sf;
39436 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39437 icode = CODE_FOR_avx512f_gathersiv8di;
39439 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39440 icode = CODE_FOR_avx512f_gatherdiv16si;
39442 case IX86_BUILTIN_GATHER3SIV2DF:
39443 icode = CODE_FOR_avx512vl_gathersiv2df;
39445 case IX86_BUILTIN_GATHER3SIV4DF:
39446 icode = CODE_FOR_avx512vl_gathersiv4df;
39448 case IX86_BUILTIN_GATHER3DIV2DF:
39449 icode = CODE_FOR_avx512vl_gatherdiv2df;
39451 case IX86_BUILTIN_GATHER3DIV4DF:
39452 icode = CODE_FOR_avx512vl_gatherdiv4df;
39454 case IX86_BUILTIN_GATHER3SIV4SF:
39455 icode = CODE_FOR_avx512vl_gathersiv4sf;
39457 case IX86_BUILTIN_GATHER3SIV8SF:
39458 icode = CODE_FOR_avx512vl_gathersiv8sf;
39460 case IX86_BUILTIN_GATHER3DIV4SF:
39461 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39463 case IX86_BUILTIN_GATHER3DIV8SF:
39464 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39466 case IX86_BUILTIN_GATHER3SIV2DI:
39467 icode = CODE_FOR_avx512vl_gathersiv2di;
39469 case IX86_BUILTIN_GATHER3SIV4DI:
39470 icode = CODE_FOR_avx512vl_gathersiv4di;
39472 case IX86_BUILTIN_GATHER3DIV2DI:
39473 icode = CODE_FOR_avx512vl_gatherdiv2di;
39475 case IX86_BUILTIN_GATHER3DIV4DI:
39476 icode = CODE_FOR_avx512vl_gatherdiv4di;
39478 case IX86_BUILTIN_GATHER3SIV4SI:
39479 icode = CODE_FOR_avx512vl_gathersiv4si;
39481 case IX86_BUILTIN_GATHER3SIV8SI:
39482 icode = CODE_FOR_avx512vl_gathersiv8si;
39484 case IX86_BUILTIN_GATHER3DIV4SI:
39485 icode = CODE_FOR_avx512vl_gatherdiv4si;
39487 case IX86_BUILTIN_GATHER3DIV8SI:
39488 icode = CODE_FOR_avx512vl_gatherdiv8si;
39490 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39491 icode = CODE_FOR_avx512vl_gathersiv4df;
39493 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39494 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39496 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39497 icode = CODE_FOR_avx512vl_gathersiv4di;
39499 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39500 icode = CODE_FOR_avx512vl_gatherdiv8si;
39502 case IX86_BUILTIN_SCATTERSIV16SF:
39503 icode = CODE_FOR_avx512f_scattersiv16sf;
39505 case IX86_BUILTIN_SCATTERSIV8DF:
39506 icode = CODE_FOR_avx512f_scattersiv8df;
39508 case IX86_BUILTIN_SCATTERDIV16SF:
39509 icode = CODE_FOR_avx512f_scatterdiv16sf;
39511 case IX86_BUILTIN_SCATTERDIV8DF:
39512 icode = CODE_FOR_avx512f_scatterdiv8df;
39514 case IX86_BUILTIN_SCATTERSIV16SI:
39515 icode = CODE_FOR_avx512f_scattersiv16si;
39517 case IX86_BUILTIN_SCATTERSIV8DI:
39518 icode = CODE_FOR_avx512f_scattersiv8di;
39520 case IX86_BUILTIN_SCATTERDIV16SI:
39521 icode = CODE_FOR_avx512f_scatterdiv16si;
39523 case IX86_BUILTIN_SCATTERDIV8DI:
39524 icode = CODE_FOR_avx512f_scatterdiv8di;
39526 case IX86_BUILTIN_SCATTERSIV8SF:
39527 icode = CODE_FOR_avx512vl_scattersiv8sf;
39529 case IX86_BUILTIN_SCATTERSIV4SF:
39530 icode = CODE_FOR_avx512vl_scattersiv4sf;
39532 case IX86_BUILTIN_SCATTERSIV4DF:
39533 icode = CODE_FOR_avx512vl_scattersiv4df;
39535 case IX86_BUILTIN_SCATTERSIV2DF:
39536 icode = CODE_FOR_avx512vl_scattersiv2df;
39538 case IX86_BUILTIN_SCATTERDIV8SF:
39539 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39541 case IX86_BUILTIN_SCATTERDIV4SF:
39542 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39544 case IX86_BUILTIN_SCATTERDIV4DF:
39545 icode = CODE_FOR_avx512vl_scatterdiv4df;
39547 case IX86_BUILTIN_SCATTERDIV2DF:
39548 icode = CODE_FOR_avx512vl_scatterdiv2df;
39550 case IX86_BUILTIN_SCATTERSIV8SI:
39551 icode = CODE_FOR_avx512vl_scattersiv8si;
39553 case IX86_BUILTIN_SCATTERSIV4SI:
39554 icode = CODE_FOR_avx512vl_scattersiv4si;
39556 case IX86_BUILTIN_SCATTERSIV4DI:
39557 icode = CODE_FOR_avx512vl_scattersiv4di;
39559 case IX86_BUILTIN_SCATTERSIV2DI:
39560 icode = CODE_FOR_avx512vl_scattersiv2di;
39562 case IX86_BUILTIN_SCATTERDIV8SI:
39563 icode = CODE_FOR_avx512vl_scatterdiv8si;
39565 case IX86_BUILTIN_SCATTERDIV4SI:
39566 icode = CODE_FOR_avx512vl_scatterdiv4si;
39568 case IX86_BUILTIN_SCATTERDIV4DI:
39569 icode = CODE_FOR_avx512vl_scatterdiv4di;
39571 case IX86_BUILTIN_SCATTERDIV2DI:
39572 icode = CODE_FOR_avx512vl_scatterdiv2di;
39574 case IX86_BUILTIN_GATHERPFDPD:
39575 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39576 goto vec_prefetch_gen;
39577 case IX86_BUILTIN_GATHERPFDPS:
39578 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39579 goto vec_prefetch_gen;
39580 case IX86_BUILTIN_GATHERPFQPD:
39581 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39582 goto vec_prefetch_gen;
39583 case IX86_BUILTIN_GATHERPFQPS:
39584 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39585 goto vec_prefetch_gen;
39586 case IX86_BUILTIN_SCATTERPFDPD:
39587 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39588 goto vec_prefetch_gen;
39589 case IX86_BUILTIN_SCATTERPFDPS:
39590 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39591 goto vec_prefetch_gen;
39592 case IX86_BUILTIN_SCATTERPFQPD:
39593 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39594 goto vec_prefetch_gen;
39595 case IX86_BUILTIN_SCATTERPFQPS:
39596 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39597 goto vec_prefetch_gen;
39601 rtx (*gen) (rtx, rtx);
39603 arg0 = CALL_EXPR_ARG (exp, 0);
39604 arg1 = CALL_EXPR_ARG (exp, 1);
39605 arg2 = CALL_EXPR_ARG (exp, 2);
39606 arg3 = CALL_EXPR_ARG (exp, 3);
39607 arg4 = CALL_EXPR_ARG (exp, 4);
39608 op0 = expand_normal (arg0);
39609 op1 = expand_normal (arg1);
39610 op2 = expand_normal (arg2);
39611 op3 = expand_normal (arg3);
39612 op4 = expand_normal (arg4);
39613 /* Note the arg order is different from the operand order. */
39614 mode0 = insn_data[icode].operand[1].mode;
39615 mode2 = insn_data[icode].operand[3].mode;
39616 mode3 = insn_data[icode].operand[4].mode;
39617 mode4 = insn_data[icode].operand[5].mode;
39619 if (target == NULL_RTX
39620 || GET_MODE (target) != insn_data[icode].operand[0].mode
39621 || !insn_data[icode].operand[0].predicate (target,
39622 GET_MODE (target)))
39623 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
39625 subtarget = target;
39629 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39630 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39631 half = gen_reg_rtx (V8SImode);
39632 if (!nonimmediate_operand (op2, V16SImode))
39633 op2 = copy_to_mode_reg (V16SImode, op2);
39634 emit_insn (gen_vec_extract_lo_v16si (half, op2));
39637 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39638 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39639 case IX86_BUILTIN_GATHERALTSIV4DF:
39640 case IX86_BUILTIN_GATHERALTSIV4DI:
39641 half = gen_reg_rtx (V4SImode);
39642 if (!nonimmediate_operand (op2, V8SImode))
39643 op2 = copy_to_mode_reg (V8SImode, op2);
39644 emit_insn (gen_vec_extract_lo_v8si (half, op2));
39647 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39648 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39649 half = gen_reg_rtx (mode0);
39650 if (mode0 == V8SFmode)
39651 gen = gen_vec_extract_lo_v16sf;
39653 gen = gen_vec_extract_lo_v16si;
39654 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39655 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39656 emit_insn (gen (half, op0));
39658 if (GET_MODE (op3) != VOIDmode)
39660 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39661 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39662 emit_insn (gen (half, op3));
39666 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39667 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39668 case IX86_BUILTIN_GATHERALTDIV8SF:
39669 case IX86_BUILTIN_GATHERALTDIV8SI:
39670 half = gen_reg_rtx (mode0);
39671 if (mode0 == V4SFmode)
39672 gen = gen_vec_extract_lo_v8sf;
39674 gen = gen_vec_extract_lo_v8si;
39675 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39676 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39677 emit_insn (gen (half, op0));
39679 if (GET_MODE (op3) != VOIDmode)
39681 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39682 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39683 emit_insn (gen (half, op3));
39691 /* Force memory operand only with base register here. But we
39692 don't want to do it on memory operand for other builtin
39694 op1 = ix86_zero_extend_to_Pmode (op1);
39696 if (!insn_data[icode].operand[1].predicate (op0, mode0))
39697 op0 = copy_to_mode_reg (mode0, op0);
39698 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
39699 op1 = copy_to_mode_reg (Pmode, op1);
39700 if (!insn_data[icode].operand[3].predicate (op2, mode2))
39701 op2 = copy_to_mode_reg (mode2, op2);
39702 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
39704 if (!insn_data[icode].operand[4].predicate (op3, mode3))
39705 op3 = copy_to_mode_reg (mode3, op3);
39709 op3 = copy_to_reg (op3);
39710 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
39712 if (!insn_data[icode].operand[5].predicate (op4, mode4))
39714 error ("the last argument must be scale 1, 2, 4, 8");
39718 /* Optimize. If mask is known to have all high bits set,
39719 replace op0 with pc_rtx to signal that the instruction
39720 overwrites the whole destination and doesn't use its
39721 previous contents. */
39724 if (TREE_CODE (arg3) == INTEGER_CST)
39726 if (integer_all_onesp (arg3))
39729 else if (TREE_CODE (arg3) == VECTOR_CST)
39731 unsigned int negative = 0;
39732 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
39734 tree cst = VECTOR_CST_ELT (arg3, i);
39735 if (TREE_CODE (cst) == INTEGER_CST
39736 && tree_int_cst_sign_bit (cst))
39738 else if (TREE_CODE (cst) == REAL_CST
39739 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
39742 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
39745 else if (TREE_CODE (arg3) == SSA_NAME
39746 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
39748 /* Recognize also when mask is like:
39749 __v2df src = _mm_setzero_pd ();
39750 __v2df mask = _mm_cmpeq_pd (src, src);
39752 __v8sf src = _mm256_setzero_ps ();
39753 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
39754 as that is a cheaper way to load all ones into
39755 a register than having to load a constant from
39757 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
39758 if (is_gimple_call (def_stmt))
39760 tree fndecl = gimple_call_fndecl (def_stmt);
39762 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
39763 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
39765 case IX86_BUILTIN_CMPPD:
39766 case IX86_BUILTIN_CMPPS:
39767 case IX86_BUILTIN_CMPPD256:
39768 case IX86_BUILTIN_CMPPS256:
39769 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
39772 case IX86_BUILTIN_CMPEQPD:
39773 case IX86_BUILTIN_CMPEQPS:
39774 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
39775 && initializer_zerop (gimple_call_arg (def_stmt,
39786 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
39793 case IX86_BUILTIN_GATHER3DIV16SF:
39794 if (target == NULL_RTX)
39795 target = gen_reg_rtx (V8SFmode);
39796 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
39798 case IX86_BUILTIN_GATHER3DIV16SI:
39799 if (target == NULL_RTX)
39800 target = gen_reg_rtx (V8SImode);
39801 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
39803 case IX86_BUILTIN_GATHER3DIV8SF:
39804 case IX86_BUILTIN_GATHERDIV8SF:
39805 if (target == NULL_RTX)
39806 target = gen_reg_rtx (V4SFmode);
39807 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
39809 case IX86_BUILTIN_GATHER3DIV8SI:
39810 case IX86_BUILTIN_GATHERDIV8SI:
39811 if (target == NULL_RTX)
39812 target = gen_reg_rtx (V4SImode);
39813 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
39816 target = subtarget;
39822 arg0 = CALL_EXPR_ARG (exp, 0);
39823 arg1 = CALL_EXPR_ARG (exp, 1);
39824 arg2 = CALL_EXPR_ARG (exp, 2);
39825 arg3 = CALL_EXPR_ARG (exp, 3);
39826 arg4 = CALL_EXPR_ARG (exp, 4);
39827 op0 = expand_normal (arg0);
39828 op1 = expand_normal (arg1);
39829 op2 = expand_normal (arg2);
39830 op3 = expand_normal (arg3);
39831 op4 = expand_normal (arg4);
39832 mode1 = insn_data[icode].operand[1].mode;
39833 mode2 = insn_data[icode].operand[2].mode;
39834 mode3 = insn_data[icode].operand[3].mode;
39835 mode4 = insn_data[icode].operand[4].mode;
39837 /* Force memory operand only with base register here. But we
39838 don't want to do it on memory operand for other builtin
39840 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
39842 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39843 op0 = copy_to_mode_reg (Pmode, op0);
39845 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
39847 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39848 op1 = copy_to_mode_reg (mode1, op1);
39852 op1 = copy_to_reg (op1);
39853 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
39856 if (!insn_data[icode].operand[2].predicate (op2, mode2))
39857 op2 = copy_to_mode_reg (mode2, op2);
39859 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39860 op3 = copy_to_mode_reg (mode3, op3);
39862 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39864 error ("the last argument must be scale 1, 2, 4, 8");
39868 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39876 arg0 = CALL_EXPR_ARG (exp, 0);
39877 arg1 = CALL_EXPR_ARG (exp, 1);
39878 arg2 = CALL_EXPR_ARG (exp, 2);
39879 arg3 = CALL_EXPR_ARG (exp, 3);
39880 arg4 = CALL_EXPR_ARG (exp, 4);
39881 op0 = expand_normal (arg0);
39882 op1 = expand_normal (arg1);
39883 op2 = expand_normal (arg2);
39884 op3 = expand_normal (arg3);
39885 op4 = expand_normal (arg4);
39886 mode0 = insn_data[icode].operand[0].mode;
39887 mode1 = insn_data[icode].operand[1].mode;
39888 mode3 = insn_data[icode].operand[3].mode;
39889 mode4 = insn_data[icode].operand[4].mode;
39891 if (GET_MODE (op0) == mode0
39892 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
39894 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39895 op0 = copy_to_mode_reg (mode0, op0);
39897 else if (op0 != constm1_rtx)
39899 op0 = copy_to_reg (op0);
39900 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39903 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39904 op1 = copy_to_mode_reg (mode1, op1);
39906 /* Force memory operand only with base register here. But we
39907 don't want to do it on memory operand for other builtin
39909 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
39911 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
39912 op2 = copy_to_mode_reg (Pmode, op2);
39914 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39916 error ("the forth argument must be scale 1, 2, 4, 8");
39920 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39922 error ("incorrect hint operand");
39926 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39934 case IX86_BUILTIN_XABORT:
39935 icode = CODE_FOR_xabort;
39936 arg0 = CALL_EXPR_ARG (exp, 0);
39937 op0 = expand_normal (arg0);
39938 mode0 = insn_data[icode].operand[0].mode;
39939 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39941 error ("the xabort's argument must be an 8-bit immediate");
39944 emit_insn (gen_xabort (op0));
39951 for (i = 0, d = bdesc_special_args;
39952 i < ARRAY_SIZE (bdesc_special_args);
39954 if (d->code == fcode)
39955 return ix86_expand_special_args_builtin (d, exp, target);
39957 for (i = 0, d = bdesc_args;
39958 i < ARRAY_SIZE (bdesc_args);
39960 if (d->code == fcode)
39963 case IX86_BUILTIN_FABSQ:
39964 case IX86_BUILTIN_COPYSIGNQ:
39966 /* Emit a normal call if SSE isn't available. */
39967 return expand_call (exp, target, ignore);
39969 return ix86_expand_args_builtin (d, exp, target);
39972 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
39973 if (d->code == fcode)
39974 return ix86_expand_sse_comi (d, exp, target);
39976 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
39977 if (d->code == fcode)
39978 return ix86_expand_round_builtin (d, exp, target);
39980 for (i = 0, d = bdesc_pcmpestr;
39981 i < ARRAY_SIZE (bdesc_pcmpestr);
39983 if (d->code == fcode)
39984 return ix86_expand_sse_pcmpestr (d, exp, target);
39986 for (i = 0, d = bdesc_pcmpistr;
39987 i < ARRAY_SIZE (bdesc_pcmpistr);
39989 if (d->code == fcode)
39990 return ix86_expand_sse_pcmpistr (d, exp, target);
39992 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
39993 if (d->code == fcode)
39994 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
39995 (enum ix86_builtin_func_type)
39996 d->flag, d->comparison);
39998 gcc_unreachable ();
40001 /* This returns the target-specific builtin with code CODE if
40002 current_function_decl has visibility on this builtin, which is checked
40003 using isa flags. Returns NULL_TREE otherwise. */
40005 static tree ix86_get_builtin (enum ix86_builtins code)
40007 struct cl_target_option *opts;
40008 tree target_tree = NULL_TREE;
40010 /* Determine the isa flags of current_function_decl. */
40012 if (current_function_decl)
40013 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
40015 if (target_tree == NULL)
40016 target_tree = target_option_default_node;
40018 opts = TREE_TARGET_OPTION (target_tree);
40020 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
40021 return ix86_builtin_decl (code, true);
40026 /* Return function decl for target specific builtin
40027 for given MPX builtin passed i FCODE. */
40029 ix86_builtin_mpx_function (unsigned fcode)
40033 case BUILT_IN_CHKP_BNDMK:
40034 return ix86_builtins[IX86_BUILTIN_BNDMK];
40036 case BUILT_IN_CHKP_BNDSTX:
40037 return ix86_builtins[IX86_BUILTIN_BNDSTX];
40039 case BUILT_IN_CHKP_BNDLDX:
40040 return ix86_builtins[IX86_BUILTIN_BNDLDX];
40042 case BUILT_IN_CHKP_BNDCL:
40043 return ix86_builtins[IX86_BUILTIN_BNDCL];
40045 case BUILT_IN_CHKP_BNDCU:
40046 return ix86_builtins[IX86_BUILTIN_BNDCU];
40048 case BUILT_IN_CHKP_BNDRET:
40049 return ix86_builtins[IX86_BUILTIN_BNDRET];
40051 case BUILT_IN_CHKP_INTERSECT:
40052 return ix86_builtins[IX86_BUILTIN_BNDINT];
40054 case BUILT_IN_CHKP_NARROW:
40055 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40057 case BUILT_IN_CHKP_SIZEOF:
40058 return ix86_builtins[IX86_BUILTIN_SIZEOF];
40060 case BUILT_IN_CHKP_EXTRACT_LOWER:
40061 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40063 case BUILT_IN_CHKP_EXTRACT_UPPER:
40064 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40070 gcc_unreachable ();
40073 /* Helper function for ix86_load_bounds and ix86_store_bounds.
40075 Return an address to be used to load/store bounds for pointer
40078 SLOT_NO is an integer constant holding number of a target
40079 dependent special slot to be used in case SLOT is not a memory.
40081 SPECIAL_BASE is a pointer to be used as a base of fake address
40082 to access special slots in Bounds Table. SPECIAL_BASE[-1],
40083 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
40086 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40090 /* NULL slot means we pass bounds for pointer not passed to the
40091 function at all. Register slot means we pass pointer in a
40092 register. In both these cases bounds are passed via Bounds
40093 Table. Since we do not have actual pointer stored in memory,
40094 we have to use fake addresses to access Bounds Table. We
40095 start with (special_base - sizeof (void*)) and decrease this
40096 address by pointer size to get addresses for other slots. */
40097 if (!slot || REG_P (slot))
40099 gcc_assert (CONST_INT_P (slot_no));
40100 addr = plus_constant (Pmode, special_base,
40101 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40103 /* If pointer is passed in a memory then its address is used to
40104 access Bounds Table. */
40105 else if (MEM_P (slot))
40107 addr = XEXP (slot, 0);
40108 if (!register_operand (addr, Pmode))
40109 addr = copy_addr_to_reg (addr);
40112 gcc_unreachable ();
40117 /* Expand pass uses this hook to load bounds for function parameter
40118 PTR passed in SLOT in case its bounds are not passed in a register.
40120 If SLOT is a memory, then bounds are loaded as for regular pointer
40121 loaded from memory. PTR may be NULL in case SLOT is a memory.
40122 In such case value of PTR (if required) may be loaded from SLOT.
40124 If SLOT is NULL or a register then SLOT_NO is an integer constant
40125 holding number of the target dependent special slot which should be
40126 used to obtain bounds.
40128 Return loaded bounds. */
40131 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40133 rtx reg = gen_reg_rtx (BNDmode);
40136 /* Get address to be used to access Bounds Table. Special slots start
40137 at the location of return address of the current function. */
40138 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40140 /* Load pointer value from a memory if we don't have it. */
40143 gcc_assert (MEM_P (slot));
40144 ptr = copy_addr_to_reg (slot);
40147 emit_insn (BNDmode == BND64mode
40148 ? gen_bnd64_ldx (reg, addr, ptr)
40149 : gen_bnd32_ldx (reg, addr, ptr));
40154 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40155 passed in SLOT in case BOUNDS are not passed in a register.
40157 If SLOT is a memory, then BOUNDS are stored as for regular pointer
40158 stored in memory. PTR may be NULL in case SLOT is a memory.
40159 In such case value of PTR (if required) may be loaded from SLOT.
40161 If SLOT is NULL or a register then SLOT_NO is an integer constant
40162 holding number of the target dependent special slot which should be
40163 used to store BOUNDS. */
40166 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40170 /* Get address to be used to access Bounds Table. Special slots start
40171 at the location of return address of a called function. */
40172 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40174 /* Load pointer value from a memory if we don't have it. */
40177 gcc_assert (MEM_P (slot));
40178 ptr = copy_addr_to_reg (slot);
40181 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40182 if (!register_operand (bounds, BNDmode))
40183 bounds = copy_to_mode_reg (BNDmode, bounds);
40185 emit_insn (BNDmode == BND64mode
40186 ? gen_bnd64_stx (addr, ptr, bounds)
40187 : gen_bnd32_stx (addr, ptr, bounds));
40190 /* Load and return bounds returned by function in SLOT. */
40193 ix86_load_returned_bounds (rtx slot)
40197 gcc_assert (REG_P (slot));
40198 res = gen_reg_rtx (BNDmode);
40199 emit_move_insn (res, slot);
40204 /* Store BOUNDS returned by function into SLOT. */
40207 ix86_store_returned_bounds (rtx slot, rtx bounds)
40209 gcc_assert (REG_P (slot));
40210 emit_move_insn (slot, bounds);
40213 /* Returns a function decl for a vectorized version of the builtin function
40214 with builtin function code FN and the result vector type TYPE, or NULL_TREE
40215 if it is not available. */
40218 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40221 machine_mode in_mode, out_mode;
40223 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40225 if (TREE_CODE (type_out) != VECTOR_TYPE
40226 || TREE_CODE (type_in) != VECTOR_TYPE
40227 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40230 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40231 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40232 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40233 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40237 case BUILT_IN_SQRT:
40238 if (out_mode == DFmode && in_mode == DFmode)
40240 if (out_n == 2 && in_n == 2)
40241 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40242 else if (out_n == 4 && in_n == 4)
40243 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40244 else if (out_n == 8 && in_n == 8)
40245 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40249 case BUILT_IN_EXP2F:
40250 if (out_mode == SFmode && in_mode == SFmode)
40252 if (out_n == 16 && in_n == 16)
40253 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40257 case BUILT_IN_SQRTF:
40258 if (out_mode == SFmode && in_mode == SFmode)
40260 if (out_n == 4 && in_n == 4)
40261 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40262 else if (out_n == 8 && in_n == 8)
40263 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40264 else if (out_n == 16 && in_n == 16)
40265 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40269 case BUILT_IN_IFLOOR:
40270 case BUILT_IN_LFLOOR:
40271 case BUILT_IN_LLFLOOR:
40272 /* The round insn does not trap on denormals. */
40273 if (flag_trapping_math || !TARGET_ROUND)
40276 if (out_mode == SImode && in_mode == DFmode)
40278 if (out_n == 4 && in_n == 2)
40279 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40280 else if (out_n == 8 && in_n == 4)
40281 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40282 else if (out_n == 16 && in_n == 8)
40283 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40287 case BUILT_IN_IFLOORF:
40288 case BUILT_IN_LFLOORF:
40289 case BUILT_IN_LLFLOORF:
40290 /* The round insn does not trap on denormals. */
40291 if (flag_trapping_math || !TARGET_ROUND)
40294 if (out_mode == SImode && in_mode == SFmode)
40296 if (out_n == 4 && in_n == 4)
40297 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40298 else if (out_n == 8 && in_n == 8)
40299 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40303 case BUILT_IN_ICEIL:
40304 case BUILT_IN_LCEIL:
40305 case BUILT_IN_LLCEIL:
40306 /* The round insn does not trap on denormals. */
40307 if (flag_trapping_math || !TARGET_ROUND)
40310 if (out_mode == SImode && in_mode == DFmode)
40312 if (out_n == 4 && in_n == 2)
40313 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40314 else if (out_n == 8 && in_n == 4)
40315 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40316 else if (out_n == 16 && in_n == 8)
40317 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40321 case BUILT_IN_ICEILF:
40322 case BUILT_IN_LCEILF:
40323 case BUILT_IN_LLCEILF:
40324 /* The round insn does not trap on denormals. */
40325 if (flag_trapping_math || !TARGET_ROUND)
40328 if (out_mode == SImode && in_mode == SFmode)
40330 if (out_n == 4 && in_n == 4)
40331 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40332 else if (out_n == 8 && in_n == 8)
40333 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40337 case BUILT_IN_IRINT:
40338 case BUILT_IN_LRINT:
40339 case BUILT_IN_LLRINT:
40340 if (out_mode == SImode && in_mode == DFmode)
40342 if (out_n == 4 && in_n == 2)
40343 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40344 else if (out_n == 8 && in_n == 4)
40345 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40349 case BUILT_IN_IRINTF:
40350 case BUILT_IN_LRINTF:
40351 case BUILT_IN_LLRINTF:
40352 if (out_mode == SImode && in_mode == SFmode)
40354 if (out_n == 4 && in_n == 4)
40355 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40356 else if (out_n == 8 && in_n == 8)
40357 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40361 case BUILT_IN_IROUND:
40362 case BUILT_IN_LROUND:
40363 case BUILT_IN_LLROUND:
40364 /* The round insn does not trap on denormals. */
40365 if (flag_trapping_math || !TARGET_ROUND)
40368 if (out_mode == SImode && in_mode == DFmode)
40370 if (out_n == 4 && in_n == 2)
40371 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40372 else if (out_n == 8 && in_n == 4)
40373 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40374 else if (out_n == 16 && in_n == 8)
40375 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40379 case BUILT_IN_IROUNDF:
40380 case BUILT_IN_LROUNDF:
40381 case BUILT_IN_LLROUNDF:
40382 /* The round insn does not trap on denormals. */
40383 if (flag_trapping_math || !TARGET_ROUND)
40386 if (out_mode == SImode && in_mode == SFmode)
40388 if (out_n == 4 && in_n == 4)
40389 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40390 else if (out_n == 8 && in_n == 8)
40391 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40395 case BUILT_IN_COPYSIGN:
40396 if (out_mode == DFmode && in_mode == DFmode)
40398 if (out_n == 2 && in_n == 2)
40399 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40400 else if (out_n == 4 && in_n == 4)
40401 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40402 else if (out_n == 8 && in_n == 8)
40403 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40407 case BUILT_IN_COPYSIGNF:
40408 if (out_mode == SFmode && in_mode == SFmode)
40410 if (out_n == 4 && in_n == 4)
40411 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40412 else if (out_n == 8 && in_n == 8)
40413 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40414 else if (out_n == 16 && in_n == 16)
40415 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40419 case BUILT_IN_FLOOR:
40420 /* The round insn does not trap on denormals. */
40421 if (flag_trapping_math || !TARGET_ROUND)
40424 if (out_mode == DFmode && in_mode == DFmode)
40426 if (out_n == 2 && in_n == 2)
40427 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40428 else if (out_n == 4 && in_n == 4)
40429 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40433 case BUILT_IN_FLOORF:
40434 /* The round insn does not trap on denormals. */
40435 if (flag_trapping_math || !TARGET_ROUND)
40438 if (out_mode == SFmode && in_mode == SFmode)
40440 if (out_n == 4 && in_n == 4)
40441 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40442 else if (out_n == 8 && in_n == 8)
40443 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40447 case BUILT_IN_CEIL:
40448 /* The round insn does not trap on denormals. */
40449 if (flag_trapping_math || !TARGET_ROUND)
40452 if (out_mode == DFmode && in_mode == DFmode)
40454 if (out_n == 2 && in_n == 2)
40455 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40456 else if (out_n == 4 && in_n == 4)
40457 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40461 case BUILT_IN_CEILF:
40462 /* The round insn does not trap on denormals. */
40463 if (flag_trapping_math || !TARGET_ROUND)
40466 if (out_mode == SFmode && in_mode == SFmode)
40468 if (out_n == 4 && in_n == 4)
40469 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40470 else if (out_n == 8 && in_n == 8)
40471 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40475 case BUILT_IN_TRUNC:
40476 /* The round insn does not trap on denormals. */
40477 if (flag_trapping_math || !TARGET_ROUND)
40480 if (out_mode == DFmode && in_mode == DFmode)
40482 if (out_n == 2 && in_n == 2)
40483 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40484 else if (out_n == 4 && in_n == 4)
40485 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40489 case BUILT_IN_TRUNCF:
40490 /* The round insn does not trap on denormals. */
40491 if (flag_trapping_math || !TARGET_ROUND)
40494 if (out_mode == SFmode && in_mode == SFmode)
40496 if (out_n == 4 && in_n == 4)
40497 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40498 else if (out_n == 8 && in_n == 8)
40499 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40503 case BUILT_IN_RINT:
40504 /* The round insn does not trap on denormals. */
40505 if (flag_trapping_math || !TARGET_ROUND)
40508 if (out_mode == DFmode && in_mode == DFmode)
40510 if (out_n == 2 && in_n == 2)
40511 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40512 else if (out_n == 4 && in_n == 4)
40513 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40517 case BUILT_IN_RINTF:
40518 /* The round insn does not trap on denormals. */
40519 if (flag_trapping_math || !TARGET_ROUND)
40522 if (out_mode == SFmode && in_mode == SFmode)
40524 if (out_n == 4 && in_n == 4)
40525 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40526 else if (out_n == 8 && in_n == 8)
40527 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40531 case BUILT_IN_ROUND:
40532 /* The round insn does not trap on denormals. */
40533 if (flag_trapping_math || !TARGET_ROUND)
40536 if (out_mode == DFmode && in_mode == DFmode)
40538 if (out_n == 2 && in_n == 2)
40539 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40540 else if (out_n == 4 && in_n == 4)
40541 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40545 case BUILT_IN_ROUNDF:
40546 /* The round insn does not trap on denormals. */
40547 if (flag_trapping_math || !TARGET_ROUND)
40550 if (out_mode == SFmode && in_mode == SFmode)
40552 if (out_n == 4 && in_n == 4)
40553 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40554 else if (out_n == 8 && in_n == 8)
40555 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40560 if (out_mode == DFmode && in_mode == DFmode)
40562 if (out_n == 2 && in_n == 2)
40563 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40564 if (out_n == 4 && in_n == 4)
40565 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40569 case BUILT_IN_FMAF:
40570 if (out_mode == SFmode && in_mode == SFmode)
40572 if (out_n == 4 && in_n == 4)
40573 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40574 if (out_n == 8 && in_n == 8)
40575 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40583 /* Dispatch to a handler for a vectorization library. */
40584 if (ix86_veclib_handler)
40585 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40591 /* Handler for an SVML-style interface to
40592 a library with vectorized intrinsics. */
40595 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40598 tree fntype, new_fndecl, args;
40601 machine_mode el_mode, in_mode;
40604 /* The SVML is suitable for unsafe math only. */
40605 if (!flag_unsafe_math_optimizations)
40608 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40609 n = TYPE_VECTOR_SUBPARTS (type_out);
40610 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40611 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40612 if (el_mode != in_mode
40620 case BUILT_IN_LOG10:
40622 case BUILT_IN_TANH:
40624 case BUILT_IN_ATAN:
40625 case BUILT_IN_ATAN2:
40626 case BUILT_IN_ATANH:
40627 case BUILT_IN_CBRT:
40628 case BUILT_IN_SINH:
40630 case BUILT_IN_ASINH:
40631 case BUILT_IN_ASIN:
40632 case BUILT_IN_COSH:
40634 case BUILT_IN_ACOSH:
40635 case BUILT_IN_ACOS:
40636 if (el_mode != DFmode || n != 2)
40640 case BUILT_IN_EXPF:
40641 case BUILT_IN_LOGF:
40642 case BUILT_IN_LOG10F:
40643 case BUILT_IN_POWF:
40644 case BUILT_IN_TANHF:
40645 case BUILT_IN_TANF:
40646 case BUILT_IN_ATANF:
40647 case BUILT_IN_ATAN2F:
40648 case BUILT_IN_ATANHF:
40649 case BUILT_IN_CBRTF:
40650 case BUILT_IN_SINHF:
40651 case BUILT_IN_SINF:
40652 case BUILT_IN_ASINHF:
40653 case BUILT_IN_ASINF:
40654 case BUILT_IN_COSHF:
40655 case BUILT_IN_COSF:
40656 case BUILT_IN_ACOSHF:
40657 case BUILT_IN_ACOSF:
40658 if (el_mode != SFmode || n != 4)
40666 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40668 if (fn == BUILT_IN_LOGF)
40669 strcpy (name, "vmlsLn4");
40670 else if (fn == BUILT_IN_LOG)
40671 strcpy (name, "vmldLn2");
40674 sprintf (name, "vmls%s", bname+10);
40675 name[strlen (name)-1] = '4';
40678 sprintf (name, "vmld%s2", bname+10);
40680 /* Convert to uppercase. */
40684 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40686 args = TREE_CHAIN (args))
40690 fntype = build_function_type_list (type_out, type_in, NULL);
40692 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40694 /* Build a function declaration for the vectorized function. */
40695 new_fndecl = build_decl (BUILTINS_LOCATION,
40696 FUNCTION_DECL, get_identifier (name), fntype);
40697 TREE_PUBLIC (new_fndecl) = 1;
40698 DECL_EXTERNAL (new_fndecl) = 1;
40699 DECL_IS_NOVOPS (new_fndecl) = 1;
40700 TREE_READONLY (new_fndecl) = 1;
40705 /* Handler for an ACML-style interface to
40706 a library with vectorized intrinsics. */
40709 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
40711 char name[20] = "__vr.._";
40712 tree fntype, new_fndecl, args;
40715 machine_mode el_mode, in_mode;
40718 /* The ACML is 64bits only and suitable for unsafe math only as
40719 it does not correctly support parts of IEEE with the required
40720 precision such as denormals. */
40722 || !flag_unsafe_math_optimizations)
40725 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40726 n = TYPE_VECTOR_SUBPARTS (type_out);
40727 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40728 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40729 if (el_mode != in_mode
40739 case BUILT_IN_LOG2:
40740 case BUILT_IN_LOG10:
40743 if (el_mode != DFmode
40748 case BUILT_IN_SINF:
40749 case BUILT_IN_COSF:
40750 case BUILT_IN_EXPF:
40751 case BUILT_IN_POWF:
40752 case BUILT_IN_LOGF:
40753 case BUILT_IN_LOG2F:
40754 case BUILT_IN_LOG10F:
40757 if (el_mode != SFmode
40766 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40767 sprintf (name + 7, "%s", bname+10);
40770 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40772 args = TREE_CHAIN (args))
40776 fntype = build_function_type_list (type_out, type_in, NULL);
40778 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40780 /* Build a function declaration for the vectorized function. */
40781 new_fndecl = build_decl (BUILTINS_LOCATION,
40782 FUNCTION_DECL, get_identifier (name), fntype);
40783 TREE_PUBLIC (new_fndecl) = 1;
40784 DECL_EXTERNAL (new_fndecl) = 1;
40785 DECL_IS_NOVOPS (new_fndecl) = 1;
40786 TREE_READONLY (new_fndecl) = 1;
40791 /* Returns a decl of a function that implements gather load with
40792 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
40793 Return NULL_TREE if it is not available. */
40796 ix86_vectorize_builtin_gather (const_tree mem_vectype,
40797 const_tree index_type, int scale)
40800 enum ix86_builtins code;
40805 if ((TREE_CODE (index_type) != INTEGER_TYPE
40806 && !POINTER_TYPE_P (index_type))
40807 || (TYPE_MODE (index_type) != SImode
40808 && TYPE_MODE (index_type) != DImode))
40811 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
40814 /* v*gather* insn sign extends index to pointer mode. */
40815 if (TYPE_PRECISION (index_type) < POINTER_SIZE
40816 && TYPE_UNSIGNED (index_type))
40821 || (scale & (scale - 1)) != 0)
40824 si = TYPE_MODE (index_type) == SImode;
40825 switch (TYPE_MODE (mem_vectype))
40828 if (TARGET_AVX512VL)
40829 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
40831 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
40834 if (TARGET_AVX512VL)
40835 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
40837 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
40840 if (TARGET_AVX512VL)
40841 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
40843 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
40846 if (TARGET_AVX512VL)
40847 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
40849 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
40852 if (TARGET_AVX512VL)
40853 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
40855 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
40858 if (TARGET_AVX512VL)
40859 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
40861 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
40864 if (TARGET_AVX512VL)
40865 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
40867 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
40870 if (TARGET_AVX512VL)
40871 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
40873 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
40876 if (TARGET_AVX512F)
40877 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
40882 if (TARGET_AVX512F)
40883 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
40888 if (TARGET_AVX512F)
40889 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
40894 if (TARGET_AVX512F)
40895 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
40903 return ix86_get_builtin (code);
40906 /* Returns a code for a target-specific builtin that implements
40907 reciprocal of the function, or NULL_TREE if not available. */
40910 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
40912 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
40913 && flag_finite_math_only && !flag_trapping_math
40914 && flag_unsafe_math_optimizations))
40918 /* Machine dependent builtins. */
40921 /* Vectorized version of sqrt to rsqrt conversion. */
40922 case IX86_BUILTIN_SQRTPS_NR:
40923 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
40925 case IX86_BUILTIN_SQRTPS_NR256:
40926 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
40932 /* Normal builtins. */
40935 /* Sqrt to rsqrt conversion. */
40936 case BUILT_IN_SQRTF:
40937 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
40944 /* Helper for avx_vpermilps256_operand et al. This is also used by
40945 the expansion functions to turn the parallel back into a mask.
40946 The return value is 0 for no match and the imm8+1 for a match. */
40949 avx_vpermilp_parallel (rtx par, machine_mode mode)
40951 unsigned i, nelt = GET_MODE_NUNITS (mode);
40953 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
40955 if (XVECLEN (par, 0) != (int) nelt)
40958 /* Validate that all of the elements are constants, and not totally
40959 out of range. Copy the data into an integral array to make the
40960 subsequent checks easier. */
40961 for (i = 0; i < nelt; ++i)
40963 rtx er = XVECEXP (par, 0, i);
40964 unsigned HOST_WIDE_INT ei;
40966 if (!CONST_INT_P (er))
40977 /* In the 512-bit DFmode case, we can only move elements within
40978 a 128-bit lane. First fill the second part of the mask,
40980 for (i = 4; i < 6; ++i)
40982 if (ipar[i] < 4 || ipar[i] >= 6)
40984 mask |= (ipar[i] - 4) << i;
40986 for (i = 6; i < 8; ++i)
40990 mask |= (ipar[i] - 6) << i;
40995 /* In the 256-bit DFmode case, we can only move elements within
40997 for (i = 0; i < 2; ++i)
41001 mask |= ipar[i] << i;
41003 for (i = 2; i < 4; ++i)
41007 mask |= (ipar[i] - 2) << i;
41012 /* In 512 bit SFmode case, permutation in the upper 256 bits
41013 must mirror the permutation in the lower 256-bits. */
41014 for (i = 0; i < 8; ++i)
41015 if (ipar[i] + 8 != ipar[i + 8])
41020 /* In 256 bit SFmode case, we have full freedom of
41021 movement within the low 128-bit lane, but the high 128-bit
41022 lane must mirror the exact same pattern. */
41023 for (i = 0; i < 4; ++i)
41024 if (ipar[i] + 4 != ipar[i + 4])
41031 /* In the 128-bit case, we've full freedom in the placement of
41032 the elements from the source operand. */
41033 for (i = 0; i < nelt; ++i)
41034 mask |= ipar[i] << (i * (nelt / 2));
41038 gcc_unreachable ();
41041 /* Make sure success has a non-zero value by adding one. */
41045 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
41046 the expansion functions to turn the parallel back into a mask.
41047 The return value is 0 for no match and the imm8+1 for a match. */
41050 avx_vperm2f128_parallel (rtx par, machine_mode mode)
41052 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41054 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
41056 if (XVECLEN (par, 0) != (int) nelt)
41059 /* Validate that all of the elements are constants, and not totally
41060 out of range. Copy the data into an integral array to make the
41061 subsequent checks easier. */
41062 for (i = 0; i < nelt; ++i)
41064 rtx er = XVECEXP (par, 0, i);
41065 unsigned HOST_WIDE_INT ei;
41067 if (!CONST_INT_P (er))
41070 if (ei >= 2 * nelt)
41075 /* Validate that the halves of the permute are halves. */
41076 for (i = 0; i < nelt2 - 1; ++i)
41077 if (ipar[i] + 1 != ipar[i + 1])
41079 for (i = nelt2; i < nelt - 1; ++i)
41080 if (ipar[i] + 1 != ipar[i + 1])
41083 /* Reconstruct the mask. */
41084 for (i = 0; i < 2; ++i)
41086 unsigned e = ipar[i * nelt2];
41090 mask |= e << (i * 4);
41093 /* Make sure success has a non-zero value by adding one. */
41097 /* Return a register priority for hard reg REGNO. */
41099 ix86_register_priority (int hard_regno)
41101 /* ebp and r13 as the base always wants a displacement, r12 as the
41102 base always wants an index. So discourage their usage in an
41104 if (hard_regno == R12_REG || hard_regno == R13_REG)
41106 if (hard_regno == BP_REG)
41108 /* New x86-64 int registers result in bigger code size. Discourage
41110 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41112 /* New x86-64 SSE registers result in bigger code size. Discourage
41114 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41116 /* Usage of AX register results in smaller code. Prefer it. */
41117 if (hard_regno == 0)
41122 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
41124 Put float CONST_DOUBLE in the constant pool instead of fp regs.
41125 QImode must go into class Q_REGS.
41126 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
41127 movdf to do mem-to-mem moves through integer regs. */
41130 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41132 machine_mode mode = GET_MODE (x);
41134 /* We're only allowed to return a subclass of CLASS. Many of the
41135 following checks fail for NO_REGS, so eliminate that early. */
41136 if (regclass == NO_REGS)
41139 /* All classes can load zeros. */
41140 if (x == CONST0_RTX (mode))
41143 /* Force constants into memory if we are loading a (nonzero) constant into
41144 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
41145 instructions to load from a constant. */
41147 && (MAYBE_MMX_CLASS_P (regclass)
41148 || MAYBE_SSE_CLASS_P (regclass)
41149 || MAYBE_MASK_CLASS_P (regclass)))
41152 /* Prefer SSE regs only, if we can use them for math. */
41153 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41154 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41156 /* Floating-point constants need more complex checks. */
41157 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
41159 /* General regs can load everything. */
41160 if (reg_class_subset_p (regclass, GENERAL_REGS))
41163 /* Floats can load 0 and 1 plus some others. Note that we eliminated
41164 zero above. We only want to wind up preferring 80387 registers if
41165 we plan on doing computation with them. */
41167 && standard_80387_constant_p (x) > 0)
41169 /* Limit class to non-sse. */
41170 if (regclass == FLOAT_SSE_REGS)
41172 if (regclass == FP_TOP_SSE_REGS)
41174 if (regclass == FP_SECOND_SSE_REGS)
41175 return FP_SECOND_REG;
41176 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41183 /* Generally when we see PLUS here, it's the function invariant
41184 (plus soft-fp const_int). Which can only be computed into general
41186 if (GET_CODE (x) == PLUS)
41187 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41189 /* QImode constants are easy to load, but non-constant QImode data
41190 must go into Q_REGS. */
41191 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41193 if (reg_class_subset_p (regclass, Q_REGS))
41195 if (reg_class_subset_p (Q_REGS, regclass))
41203 /* Discourage putting floating-point values in SSE registers unless
41204 SSE math is being used, and likewise for the 387 registers. */
41206 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41208 machine_mode mode = GET_MODE (x);
41210 /* Restrict the output reload class to the register bank that we are doing
41211 math on. If we would like not to return a subset of CLASS, reject this
41212 alternative: if reload cannot do this, it will still use its choice. */
41213 mode = GET_MODE (x);
41214 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41215 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41217 if (X87_FLOAT_MODE_P (mode))
41219 if (regclass == FP_TOP_SSE_REGS)
41221 else if (regclass == FP_SECOND_SSE_REGS)
41222 return FP_SECOND_REG;
41224 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41231 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41232 machine_mode mode, secondary_reload_info *sri)
41234 /* Double-word spills from general registers to non-offsettable memory
41235 references (zero-extended addresses) require special handling. */
41238 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41239 && INTEGER_CLASS_P (rclass)
41240 && !offsettable_memref_p (x))
41243 ? CODE_FOR_reload_noff_load
41244 : CODE_FOR_reload_noff_store);
41245 /* Add the cost of moving address to a temporary. */
41246 sri->extra_cost = 1;
41251 /* QImode spills from non-QI registers require
41252 intermediate register on 32bit targets. */
41254 && (MAYBE_MASK_CLASS_P (rclass)
41255 || (!TARGET_64BIT && !in_p
41256 && INTEGER_CLASS_P (rclass)
41257 && MAYBE_NON_Q_CLASS_P (rclass))))
41266 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41267 regno = true_regnum (x);
41269 /* Return Q_REGS if the operand is in memory. */
41274 /* This condition handles corner case where an expression involving
41275 pointers gets vectorized. We're trying to use the address of a
41276 stack slot as a vector initializer.
41278 (set (reg:V2DI 74 [ vect_cst_.2 ])
41279 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41281 Eventually frame gets turned into sp+offset like this:
41283 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41284 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41285 (const_int 392 [0x188]))))
41287 That later gets turned into:
41289 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41290 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41291 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41293 We'll have the following reload recorded:
41295 Reload 0: reload_in (DI) =
41296 (plus:DI (reg/f:DI 7 sp)
41297 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41298 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41299 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41300 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41301 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41302 reload_reg_rtx: (reg:V2DI 22 xmm1)
41304 Which isn't going to work since SSE instructions can't handle scalar
41305 additions. Returning GENERAL_REGS forces the addition into integer
41306 register and reload can handle subsequent reloads without problems. */
41308 if (in_p && GET_CODE (x) == PLUS
41309 && SSE_CLASS_P (rclass)
41310 && SCALAR_INT_MODE_P (mode))
41311 return GENERAL_REGS;
41316 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41319 ix86_class_likely_spilled_p (reg_class_t rclass)
41330 case SSE_FIRST_REG:
41332 case FP_SECOND_REG:
41343 /* If we are copying between general and FP registers, we need a memory
41344 location. The same is true for SSE and MMX registers.
41346 To optimize register_move_cost performance, allow inline variant.
41348 The macro can't work reliably when one of the CLASSES is class containing
41349 registers from multiple units (SSE, MMX, integer). We avoid this by never
41350 combining those units in single alternative in the machine description.
41351 Ensure that this constraint holds to avoid unexpected surprises.
41353 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41354 enforce these sanity checks. */
41357 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41358 machine_mode mode, int strict)
41360 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41362 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41363 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41364 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41365 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41366 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41367 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41369 gcc_assert (!strict || lra_in_progress);
41373 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41376 /* Between mask and general, we have moves no larger than word size. */
41377 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41378 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41381 /* ??? This is a lie. We do have moves between mmx/general, and for
41382 mmx/sse2. But by saying we need secondary memory we discourage the
41383 register allocator from using the mmx registers unless needed. */
41384 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41387 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41389 /* SSE1 doesn't have any direct moves from other classes. */
41393 /* If the target says that inter-unit moves are more expensive
41394 than moving through memory, then don't generate them. */
41395 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41396 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41399 /* Between SSE and general, we have moves no larger than word size. */
41400 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41408 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41409 machine_mode mode, int strict)
41411 return inline_secondary_memory_needed (class1, class2, mode, strict);
41414 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41416 On the 80386, this is the size of MODE in words,
41417 except in the FP regs, where a single reg is always enough. */
41419 static unsigned char
41420 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41422 if (MAYBE_INTEGER_CLASS_P (rclass))
41424 if (mode == XFmode)
41425 return (TARGET_64BIT ? 2 : 3);
41426 else if (mode == XCmode)
41427 return (TARGET_64BIT ? 4 : 6);
41429 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41433 if (COMPLEX_MODE_P (mode))
41440 /* Return true if the registers in CLASS cannot represent the change from
41441 modes FROM to TO. */
41444 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41445 enum reg_class regclass)
41450 /* x87 registers can't do subreg at all, as all values are reformatted
41451 to extended precision. */
41452 if (MAYBE_FLOAT_CLASS_P (regclass))
41455 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41457 /* Vector registers do not support QI or HImode loads. If we don't
41458 disallow a change to these modes, reload will assume it's ok to
41459 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41460 the vec_dupv4hi pattern. */
41461 if (GET_MODE_SIZE (from) < 4)
41468 /* Return the cost of moving data of mode M between a
41469 register and memory. A value of 2 is the default; this cost is
41470 relative to those in `REGISTER_MOVE_COST'.
41472 This function is used extensively by register_move_cost that is used to
41473 build tables at startup. Make it inline in this case.
41474 When IN is 2, return maximum of in and out move cost.
41476 If moving between registers and memory is more expensive than
41477 between two registers, you should define this macro to express the
41480 Model also increased moving costs of QImode registers in non
41484 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41488 if (FLOAT_CLASS_P (regclass))
41506 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41507 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41509 if (SSE_CLASS_P (regclass))
41512 switch (GET_MODE_SIZE (mode))
41527 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41528 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41530 if (MMX_CLASS_P (regclass))
41533 switch (GET_MODE_SIZE (mode))
41545 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41546 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41548 switch (GET_MODE_SIZE (mode))
41551 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41554 return ix86_cost->int_store[0];
41555 if (TARGET_PARTIAL_REG_DEPENDENCY
41556 && optimize_function_for_speed_p (cfun))
41557 cost = ix86_cost->movzbl_load;
41559 cost = ix86_cost->int_load[0];
41561 return MAX (cost, ix86_cost->int_store[0]);
41567 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41569 return ix86_cost->movzbl_load;
41571 return ix86_cost->int_store[0] + 4;
41576 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41577 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41579 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41580 if (mode == TFmode)
41583 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41585 cost = ix86_cost->int_load[2];
41587 cost = ix86_cost->int_store[2];
41588 return (cost * (((int) GET_MODE_SIZE (mode)
41589 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
41594 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
41597 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
41601 /* Return the cost of moving data from a register in class CLASS1 to
41602 one in class CLASS2.
41604 It is not required that the cost always equal 2 when FROM is the same as TO;
41605 on some machines it is expensive to move between registers if they are not
41606 general registers. */
41609 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
41610 reg_class_t class2_i)
41612 enum reg_class class1 = (enum reg_class) class1_i;
41613 enum reg_class class2 = (enum reg_class) class2_i;
41615 /* In case we require secondary memory, compute cost of the store followed
41616 by load. In order to avoid bad register allocation choices, we need
41617 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
41619 if (inline_secondary_memory_needed (class1, class2, mode, 0))
41623 cost += inline_memory_move_cost (mode, class1, 2);
41624 cost += inline_memory_move_cost (mode, class2, 2);
41626 /* In case of copying from general_purpose_register we may emit multiple
41627 stores followed by single load causing memory size mismatch stall.
41628 Count this as arbitrarily high cost of 20. */
41629 if (targetm.class_max_nregs (class1, mode)
41630 > targetm.class_max_nregs (class2, mode))
41633 /* In the case of FP/MMX moves, the registers actually overlap, and we
41634 have to switch modes in order to treat them differently. */
41635 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
41636 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
41642 /* Moves between SSE/MMX and integer unit are expensive. */
41643 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
41644 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41646 /* ??? By keeping returned value relatively high, we limit the number
41647 of moves between integer and MMX/SSE registers for all targets.
41648 Additionally, high value prevents problem with x86_modes_tieable_p(),
41649 where integer modes in MMX/SSE registers are not tieable
41650 because of missing QImode and HImode moves to, from or between
41651 MMX/SSE registers. */
41652 return MAX (8, ix86_cost->mmxsse_to_integer);
41654 if (MAYBE_FLOAT_CLASS_P (class1))
41655 return ix86_cost->fp_move;
41656 if (MAYBE_SSE_CLASS_P (class1))
41657 return ix86_cost->sse_move;
41658 if (MAYBE_MMX_CLASS_P (class1))
41659 return ix86_cost->mmx_move;
41663 /* Return TRUE if hard register REGNO can hold a value of machine-mode
41667 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
41669 /* Flags and only flags can only hold CCmode values. */
41670 if (CC_REGNO_P (regno))
41671 return GET_MODE_CLASS (mode) == MODE_CC;
41672 if (GET_MODE_CLASS (mode) == MODE_CC
41673 || GET_MODE_CLASS (mode) == MODE_RANDOM
41674 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
41676 if (STACK_REGNO_P (regno))
41677 return VALID_FP_MODE_P (mode);
41678 if (MASK_REGNO_P (regno))
41679 return (VALID_MASK_REG_MODE (mode)
41680 || (TARGET_AVX512BW
41681 && VALID_MASK_AVX512BW_MODE (mode)));
41682 if (BND_REGNO_P (regno))
41683 return VALID_BND_REG_MODE (mode);
41684 if (SSE_REGNO_P (regno))
41686 /* We implement the move patterns for all vector modes into and
41687 out of SSE registers, even when no operation instructions
41690 /* For AVX-512 we allow, regardless of regno:
41692 - any of 512-bit wide vector mode
41693 - any scalar mode. */
41696 || VALID_AVX512F_REG_MODE (mode)
41697 || VALID_AVX512F_SCALAR_MODE (mode)))
41700 /* TODO check for QI/HI scalars. */
41701 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
41702 if (TARGET_AVX512VL
41705 || VALID_AVX256_REG_MODE (mode)
41706 || VALID_AVX512VL_128_REG_MODE (mode)))
41709 /* xmm16-xmm31 are only available for AVX-512. */
41710 if (EXT_REX_SSE_REGNO_P (regno))
41713 /* OImode and AVX modes are available only when AVX is enabled. */
41714 return ((TARGET_AVX
41715 && VALID_AVX256_REG_OR_OI_MODE (mode))
41716 || VALID_SSE_REG_MODE (mode)
41717 || VALID_SSE2_REG_MODE (mode)
41718 || VALID_MMX_REG_MODE (mode)
41719 || VALID_MMX_REG_MODE_3DNOW (mode));
41721 if (MMX_REGNO_P (regno))
41723 /* We implement the move patterns for 3DNOW modes even in MMX mode,
41724 so if the register is available at all, then we can move data of
41725 the given mode into or out of it. */
41726 return (VALID_MMX_REG_MODE (mode)
41727 || VALID_MMX_REG_MODE_3DNOW (mode));
41730 if (mode == QImode)
41732 /* Take care for QImode values - they can be in non-QI regs,
41733 but then they do cause partial register stalls. */
41734 if (ANY_QI_REGNO_P (regno))
41736 if (!TARGET_PARTIAL_REG_STALL)
41738 /* LRA checks if the hard register is OK for the given mode.
41739 QImode values can live in non-QI regs, so we allow all
41741 if (lra_in_progress)
41743 return !can_create_pseudo_p ();
41745 /* We handle both integer and floats in the general purpose registers. */
41746 else if (VALID_INT_MODE_P (mode))
41748 else if (VALID_FP_MODE_P (mode))
41750 else if (VALID_DFP_MODE_P (mode))
41752 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
41753 on to use that value in smaller contexts, this can easily force a
41754 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
41755 supporting DImode, allow it. */
41756 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
41762 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
41763 tieable integer mode. */
41766 ix86_tieable_integer_mode_p (machine_mode mode)
41775 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
41778 return TARGET_64BIT;
41785 /* Return true if MODE1 is accessible in a register that can hold MODE2
41786 without copying. That is, all register classes that can hold MODE2
41787 can also hold MODE1. */
41790 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
41792 if (mode1 == mode2)
41795 if (ix86_tieable_integer_mode_p (mode1)
41796 && ix86_tieable_integer_mode_p (mode2))
41799 /* MODE2 being XFmode implies fp stack or general regs, which means we
41800 can tie any smaller floating point modes to it. Note that we do not
41801 tie this with TFmode. */
41802 if (mode2 == XFmode)
41803 return mode1 == SFmode || mode1 == DFmode;
41805 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
41806 that we can tie it with SFmode. */
41807 if (mode2 == DFmode)
41808 return mode1 == SFmode;
41810 /* If MODE2 is only appropriate for an SSE register, then tie with
41811 any other mode acceptable to SSE registers. */
41812 if (GET_MODE_SIZE (mode2) == 32
41813 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41814 return (GET_MODE_SIZE (mode1) == 32
41815 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41816 if (GET_MODE_SIZE (mode2) == 16
41817 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41818 return (GET_MODE_SIZE (mode1) == 16
41819 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41821 /* If MODE2 is appropriate for an MMX register, then tie
41822 with any other mode acceptable to MMX registers. */
41823 if (GET_MODE_SIZE (mode2) == 8
41824 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
41825 return (GET_MODE_SIZE (mode1) == 8
41826 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
41831 /* Return the cost of moving between two registers of mode MODE. */
41834 ix86_set_reg_reg_cost (machine_mode mode)
41836 unsigned int units = UNITS_PER_WORD;
41838 switch (GET_MODE_CLASS (mode))
41844 units = GET_MODE_SIZE (CCmode);
41848 if ((TARGET_SSE && mode == TFmode)
41849 || (TARGET_80387 && mode == XFmode)
41850 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
41851 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
41852 units = GET_MODE_SIZE (mode);
41855 case MODE_COMPLEX_FLOAT:
41856 if ((TARGET_SSE && mode == TCmode)
41857 || (TARGET_80387 && mode == XCmode)
41858 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
41859 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
41860 units = GET_MODE_SIZE (mode);
41863 case MODE_VECTOR_INT:
41864 case MODE_VECTOR_FLOAT:
41865 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41866 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41867 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41868 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41869 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
41870 units = GET_MODE_SIZE (mode);
41873 /* Return the cost of moving between two registers of mode MODE,
41874 assuming that the move will be in pieces of at most UNITS bytes. */
41875 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
41878 /* Compute a (partial) cost for rtx X. Return true if the complete
41879 cost has been computed, and false if subexpressions should be
41880 scanned. In either case, *TOTAL contains the cost result. */
41883 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
41887 enum rtx_code code = (enum rtx_code) code_i;
41888 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
41889 machine_mode mode = GET_MODE (x);
41890 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
41895 if (register_operand (SET_DEST (x), VOIDmode)
41896 && reg_or_0_operand (SET_SRC (x), VOIDmode))
41898 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
41907 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
41909 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
41911 else if (flag_pic && SYMBOLIC_CONST (x)
41913 && (GET_CODE (x) == LABEL_REF
41914 || (GET_CODE (x) == SYMBOL_REF
41915 && SYMBOL_REF_LOCAL_P (x)))))
41922 if (mode == VOIDmode)
41927 switch (standard_80387_constant_p (x))
41932 default: /* Other constants */
41939 if (SSE_FLOAT_MODE_P (mode))
41942 switch (standard_sse_constant_p (x))
41946 case 1: /* 0: xor eliminates false dependency */
41949 default: /* -1: cmp contains false dependency */
41954 /* Fall back to (MEM (SYMBOL_REF)), since that's where
41955 it'll probably end up. Add a penalty for size. */
41956 *total = (COSTS_N_INSNS (1)
41957 + (flag_pic != 0 && !TARGET_64BIT)
41958 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
41962 /* The zero extensions is often completely free on x86_64, so make
41963 it as cheap as possible. */
41964 if (TARGET_64BIT && mode == DImode
41965 && GET_MODE (XEXP (x, 0)) == SImode)
41967 else if (TARGET_ZERO_EXTEND_WITH_AND)
41968 *total = cost->add;
41970 *total = cost->movzx;
41974 *total = cost->movsx;
41978 if (SCALAR_INT_MODE_P (mode)
41979 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
41980 && CONST_INT_P (XEXP (x, 1)))
41982 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
41985 *total = cost->add;
41988 if ((value == 2 || value == 3)
41989 && cost->lea <= cost->shift_const)
41991 *total = cost->lea;
42001 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42003 /* ??? Should be SSE vector operation cost. */
42004 /* At least for published AMD latencies, this really is the same
42005 as the latency for a simple fpu operation like fabs. */
42006 /* V*QImode is emulated with 1-11 insns. */
42007 if (mode == V16QImode || mode == V32QImode)
42010 if (TARGET_XOP && mode == V16QImode)
42012 /* For XOP we use vpshab, which requires a broadcast of the
42013 value to the variable shift insn. For constants this
42014 means a V16Q const in mem; even when we can perform the
42015 shift with one insn set the cost to prefer paddb. */
42016 if (CONSTANT_P (XEXP (x, 1)))
42018 *total = (cost->fabs
42019 + rtx_cost (XEXP (x, 0), code, 0, speed)
42020 + (speed ? 2 : COSTS_N_BYTES (16)));
42025 else if (TARGET_SSSE3)
42027 *total = cost->fabs * count;
42030 *total = cost->fabs;
42032 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42034 if (CONST_INT_P (XEXP (x, 1)))
42036 if (INTVAL (XEXP (x, 1)) > 32)
42037 *total = cost->shift_const + COSTS_N_INSNS (2);
42039 *total = cost->shift_const * 2;
42043 if (GET_CODE (XEXP (x, 1)) == AND)
42044 *total = cost->shift_var * 2;
42046 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
42051 if (CONST_INT_P (XEXP (x, 1)))
42052 *total = cost->shift_const;
42053 else if (GET_CODE (XEXP (x, 1)) == SUBREG
42054 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42056 /* Return the cost after shift-and truncation. */
42057 *total = cost->shift_var;
42061 *total = cost->shift_var;
42069 gcc_assert (FLOAT_MODE_P (mode));
42070 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42072 /* ??? SSE scalar/vector cost should be used here. */
42073 /* ??? Bald assumption that fma has the same cost as fmul. */
42074 *total = cost->fmul;
42075 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
42077 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
42079 if (GET_CODE (sub) == NEG)
42080 sub = XEXP (sub, 0);
42081 *total += rtx_cost (sub, FMA, 0, speed);
42084 if (GET_CODE (sub) == NEG)
42085 sub = XEXP (sub, 0);
42086 *total += rtx_cost (sub, FMA, 2, speed);
42091 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42093 /* ??? SSE scalar cost should be used here. */
42094 *total = cost->fmul;
42097 else if (X87_FLOAT_MODE_P (mode))
42099 *total = cost->fmul;
42102 else if (FLOAT_MODE_P (mode))
42104 /* ??? SSE vector cost should be used here. */
42105 *total = cost->fmul;
42108 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42110 /* V*QImode is emulated with 7-13 insns. */
42111 if (mode == V16QImode || mode == V32QImode)
42114 if (TARGET_XOP && mode == V16QImode)
42116 else if (TARGET_SSSE3)
42118 *total = cost->fmul * 2 + cost->fabs * extra;
42120 /* V*DImode is emulated with 5-8 insns. */
42121 else if (mode == V2DImode || mode == V4DImode)
42123 if (TARGET_XOP && mode == V2DImode)
42124 *total = cost->fmul * 2 + cost->fabs * 3;
42126 *total = cost->fmul * 3 + cost->fabs * 5;
42128 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42129 insns, including two PMULUDQ. */
42130 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42131 *total = cost->fmul * 2 + cost->fabs * 5;
42133 *total = cost->fmul;
42138 rtx op0 = XEXP (x, 0);
42139 rtx op1 = XEXP (x, 1);
42141 if (CONST_INT_P (XEXP (x, 1)))
42143 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42144 for (nbits = 0; value != 0; value &= value - 1)
42148 /* This is arbitrary. */
42151 /* Compute costs correctly for widening multiplication. */
42152 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42153 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42154 == GET_MODE_SIZE (mode))
42156 int is_mulwiden = 0;
42157 machine_mode inner_mode = GET_MODE (op0);
42159 if (GET_CODE (op0) == GET_CODE (op1))
42160 is_mulwiden = 1, op1 = XEXP (op1, 0);
42161 else if (CONST_INT_P (op1))
42163 if (GET_CODE (op0) == SIGN_EXTEND)
42164 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42167 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42171 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42174 *total = (cost->mult_init[MODE_INDEX (mode)]
42175 + nbits * cost->mult_bit
42176 + rtx_cost (op0, outer_code, opno, speed)
42177 + rtx_cost (op1, outer_code, opno, speed));
42186 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42187 /* ??? SSE cost should be used here. */
42188 *total = cost->fdiv;
42189 else if (X87_FLOAT_MODE_P (mode))
42190 *total = cost->fdiv;
42191 else if (FLOAT_MODE_P (mode))
42192 /* ??? SSE vector cost should be used here. */
42193 *total = cost->fdiv;
42195 *total = cost->divide[MODE_INDEX (mode)];
42199 if (GET_MODE_CLASS (mode) == MODE_INT
42200 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42202 if (GET_CODE (XEXP (x, 0)) == PLUS
42203 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42204 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42205 && CONSTANT_P (XEXP (x, 1)))
42207 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42208 if (val == 2 || val == 4 || val == 8)
42210 *total = cost->lea;
42211 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42212 outer_code, opno, speed);
42213 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
42214 outer_code, opno, speed);
42215 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42219 else if (GET_CODE (XEXP (x, 0)) == MULT
42220 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42222 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42223 if (val == 2 || val == 4 || val == 8)
42225 *total = cost->lea;
42226 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42227 outer_code, opno, speed);
42228 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42232 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42234 *total = cost->lea;
42235 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42236 outer_code, opno, speed);
42237 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42238 outer_code, opno, speed);
42239 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42246 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42248 /* ??? SSE cost should be used here. */
42249 *total = cost->fadd;
42252 else if (X87_FLOAT_MODE_P (mode))
42254 *total = cost->fadd;
42257 else if (FLOAT_MODE_P (mode))
42259 /* ??? SSE vector cost should be used here. */
42260 *total = cost->fadd;
42268 if (GET_MODE_CLASS (mode) == MODE_INT
42269 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42271 *total = (cost->add * 2
42272 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
42273 << (GET_MODE (XEXP (x, 0)) != DImode))
42274 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
42275 << (GET_MODE (XEXP (x, 1)) != DImode)));
42281 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42283 /* ??? SSE cost should be used here. */
42284 *total = cost->fchs;
42287 else if (X87_FLOAT_MODE_P (mode))
42289 *total = cost->fchs;
42292 else if (FLOAT_MODE_P (mode))
42294 /* ??? SSE vector cost should be used here. */
42295 *total = cost->fchs;
42301 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42303 /* ??? Should be SSE vector operation cost. */
42304 /* At least for published AMD latencies, this really is the same
42305 as the latency for a simple fpu operation like fabs. */
42306 *total = cost->fabs;
42308 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42309 *total = cost->add * 2;
42311 *total = cost->add;
42315 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42316 && XEXP (XEXP (x, 0), 1) == const1_rtx
42317 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42318 && XEXP (x, 1) == const0_rtx)
42320 /* This kind of construct is implemented using test[bwl].
42321 Treat it as if we had an AND. */
42322 *total = (cost->add
42323 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
42324 + rtx_cost (const1_rtx, outer_code, opno, speed));
42330 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42335 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42336 /* ??? SSE cost should be used here. */
42337 *total = cost->fabs;
42338 else if (X87_FLOAT_MODE_P (mode))
42339 *total = cost->fabs;
42340 else if (FLOAT_MODE_P (mode))
42341 /* ??? SSE vector cost should be used here. */
42342 *total = cost->fabs;
42346 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42347 /* ??? SSE cost should be used here. */
42348 *total = cost->fsqrt;
42349 else if (X87_FLOAT_MODE_P (mode))
42350 *total = cost->fsqrt;
42351 else if (FLOAT_MODE_P (mode))
42352 /* ??? SSE vector cost should be used here. */
42353 *total = cost->fsqrt;
42357 if (XINT (x, 1) == UNSPEC_TP)
42363 case VEC_DUPLICATE:
42364 /* ??? Assume all of these vector manipulation patterns are
42365 recognizable. In which case they all pretty much have the
42367 *total = cost->fabs;
42370 mask = XEXP (x, 2);
42371 /* This is masked instruction, assume the same cost,
42372 as nonmasked variant. */
42373 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42374 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
42376 *total = cost->fabs;
42386 static int current_machopic_label_num;
42388 /* Given a symbol name and its associated stub, write out the
42389 definition of the stub. */
42392 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42394 unsigned int length;
42395 char *binder_name, *symbol_name, lazy_ptr_name[32];
42396 int label = ++current_machopic_label_num;
42398 /* For 64-bit we shouldn't get here. */
42399 gcc_assert (!TARGET_64BIT);
42401 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42402 symb = targetm.strip_name_encoding (symb);
42404 length = strlen (stub);
42405 binder_name = XALLOCAVEC (char, length + 32);
42406 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42408 length = strlen (symb);
42409 symbol_name = XALLOCAVEC (char, length + 32);
42410 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42412 sprintf (lazy_ptr_name, "L%d$lz", label);
42414 if (MACHOPIC_ATT_STUB)
42415 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42416 else if (MACHOPIC_PURE)
42417 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42419 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42421 fprintf (file, "%s:\n", stub);
42422 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42424 if (MACHOPIC_ATT_STUB)
42426 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42428 else if (MACHOPIC_PURE)
42431 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42432 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42433 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42434 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42435 label, lazy_ptr_name, label);
42436 fprintf (file, "\tjmp\t*%%ecx\n");
42439 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42441 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42442 it needs no stub-binding-helper. */
42443 if (MACHOPIC_ATT_STUB)
42446 fprintf (file, "%s:\n", binder_name);
42450 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42451 fprintf (file, "\tpushl\t%%ecx\n");
42454 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42456 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42458 /* N.B. Keep the correspondence of these
42459 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42460 old-pic/new-pic/non-pic stubs; altering this will break
42461 compatibility with existing dylibs. */
42464 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42465 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42468 /* 16-byte -mdynamic-no-pic stub. */
42469 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42471 fprintf (file, "%s:\n", lazy_ptr_name);
42472 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42473 fprintf (file, ASM_LONG "%s\n", binder_name);
42475 #endif /* TARGET_MACHO */
42477 /* Order the registers for register allocator. */
42480 x86_order_regs_for_local_alloc (void)
42485 /* First allocate the local general purpose registers. */
42486 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42487 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42488 reg_alloc_order [pos++] = i;
42490 /* Global general purpose registers. */
42491 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42492 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42493 reg_alloc_order [pos++] = i;
42495 /* x87 registers come first in case we are doing FP math
42497 if (!TARGET_SSE_MATH)
42498 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42499 reg_alloc_order [pos++] = i;
42501 /* SSE registers. */
42502 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42503 reg_alloc_order [pos++] = i;
42504 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42505 reg_alloc_order [pos++] = i;
42507 /* Extended REX SSE registers. */
42508 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42509 reg_alloc_order [pos++] = i;
42511 /* Mask register. */
42512 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42513 reg_alloc_order [pos++] = i;
42515 /* MPX bound registers. */
42516 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42517 reg_alloc_order [pos++] = i;
42519 /* x87 registers. */
42520 if (TARGET_SSE_MATH)
42521 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42522 reg_alloc_order [pos++] = i;
42524 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42525 reg_alloc_order [pos++] = i;
42527 /* Initialize the rest of array as we do not allocate some registers
42529 while (pos < FIRST_PSEUDO_REGISTER)
42530 reg_alloc_order [pos++] = 0;
42533 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42534 in struct attribute_spec handler. */
42536 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42539 bool *no_add_attrs)
42541 if (TREE_CODE (*node) != FUNCTION_TYPE
42542 && TREE_CODE (*node) != METHOD_TYPE
42543 && TREE_CODE (*node) != FIELD_DECL
42544 && TREE_CODE (*node) != TYPE_DECL)
42546 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42548 *no_add_attrs = true;
42553 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42555 *no_add_attrs = true;
42558 if (is_attribute_p ("callee_pop_aggregate_return", name))
42562 cst = TREE_VALUE (args);
42563 if (TREE_CODE (cst) != INTEGER_CST)
42565 warning (OPT_Wattributes,
42566 "%qE attribute requires an integer constant argument",
42568 *no_add_attrs = true;
42570 else if (compare_tree_int (cst, 0) != 0
42571 && compare_tree_int (cst, 1) != 0)
42573 warning (OPT_Wattributes,
42574 "argument to %qE attribute is neither zero, nor one",
42576 *no_add_attrs = true;
42585 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
42586 struct attribute_spec.handler. */
42588 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
42589 bool *no_add_attrs)
42591 if (TREE_CODE (*node) != FUNCTION_TYPE
42592 && TREE_CODE (*node) != METHOD_TYPE
42593 && TREE_CODE (*node) != FIELD_DECL
42594 && TREE_CODE (*node) != TYPE_DECL)
42596 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42598 *no_add_attrs = true;
42602 /* Can combine regparm with all attributes but fastcall. */
42603 if (is_attribute_p ("ms_abi", name))
42605 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
42607 error ("ms_abi and sysv_abi attributes are not compatible");
42612 else if (is_attribute_p ("sysv_abi", name))
42614 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
42616 error ("ms_abi and sysv_abi attributes are not compatible");
42625 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
42626 struct attribute_spec.handler. */
42628 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
42629 bool *no_add_attrs)
42632 if (DECL_P (*node))
42634 if (TREE_CODE (*node) == TYPE_DECL)
42635 type = &TREE_TYPE (*node);
42640 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
42642 warning (OPT_Wattributes, "%qE attribute ignored",
42644 *no_add_attrs = true;
42647 else if ((is_attribute_p ("ms_struct", name)
42648 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
42649 || ((is_attribute_p ("gcc_struct", name)
42650 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
42652 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
42654 *no_add_attrs = true;
42661 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
42662 bool *no_add_attrs)
42664 if (TREE_CODE (*node) != FUNCTION_DECL)
42666 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42668 *no_add_attrs = true;
42674 ix86_ms_bitfield_layout_p (const_tree record_type)
42676 return ((TARGET_MS_BITFIELD_LAYOUT
42677 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
42678 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
42681 /* Returns an expression indicating where the this parameter is
42682 located on entry to the FUNCTION. */
42685 x86_this_parameter (tree function)
42687 tree type = TREE_TYPE (function);
42688 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
42693 const int *parm_regs;
42695 if (ix86_function_type_abi (type) == MS_ABI)
42696 parm_regs = x86_64_ms_abi_int_parameter_registers;
42698 parm_regs = x86_64_int_parameter_registers;
42699 return gen_rtx_REG (Pmode, parm_regs[aggr]);
42702 nregs = ix86_function_regparm (type, function);
42704 if (nregs > 0 && !stdarg_p (type))
42707 unsigned int ccvt = ix86_get_callcvt (type);
42709 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42710 regno = aggr ? DX_REG : CX_REG;
42711 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42715 return gen_rtx_MEM (SImode,
42716 plus_constant (Pmode, stack_pointer_rtx, 4));
42725 return gen_rtx_MEM (SImode,
42726 plus_constant (Pmode,
42727 stack_pointer_rtx, 4));
42730 return gen_rtx_REG (SImode, regno);
42733 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
42737 /* Determine whether x86_output_mi_thunk can succeed. */
42740 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
42741 const_tree function)
42743 /* 64-bit can handle anything. */
42747 /* For 32-bit, everything's fine if we have one free register. */
42748 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
42751 /* Need a free register for vcall_offset. */
42755 /* Need a free register for GOT references. */
42756 if (flag_pic && !targetm.binds_local_p (function))
42759 /* Otherwise ok. */
42763 /* Output the assembler code for a thunk function. THUNK_DECL is the
42764 declaration for the thunk function itself, FUNCTION is the decl for
42765 the target function. DELTA is an immediate constant offset to be
42766 added to THIS. If VCALL_OFFSET is nonzero, the word at
42767 *(*this + vcall_offset) should be added to THIS. */
42770 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
42771 HOST_WIDE_INT vcall_offset, tree function)
42773 rtx this_param = x86_this_parameter (function);
42774 rtx this_reg, tmp, fnaddr;
42775 unsigned int tmp_regno;
42779 tmp_regno = R10_REG;
42782 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
42783 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42784 tmp_regno = AX_REG;
42785 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42786 tmp_regno = DX_REG;
42788 tmp_regno = CX_REG;
42791 emit_note (NOTE_INSN_PROLOGUE_END);
42793 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
42794 pull it in now and let DELTA benefit. */
42795 if (REG_P (this_param))
42796 this_reg = this_param;
42797 else if (vcall_offset)
42799 /* Put the this parameter into %eax. */
42800 this_reg = gen_rtx_REG (Pmode, AX_REG);
42801 emit_move_insn (this_reg, this_param);
42804 this_reg = NULL_RTX;
42806 /* Adjust the this parameter by a fixed constant. */
42809 rtx delta_rtx = GEN_INT (delta);
42810 rtx delta_dst = this_reg ? this_reg : this_param;
42814 if (!x86_64_general_operand (delta_rtx, Pmode))
42816 tmp = gen_rtx_REG (Pmode, tmp_regno);
42817 emit_move_insn (tmp, delta_rtx);
42822 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
42825 /* Adjust the this parameter by a value stored in the vtable. */
42828 rtx vcall_addr, vcall_mem, this_mem;
42830 tmp = gen_rtx_REG (Pmode, tmp_regno);
42832 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
42833 if (Pmode != ptr_mode)
42834 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
42835 emit_move_insn (tmp, this_mem);
42837 /* Adjust the this parameter. */
42838 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
42840 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
42842 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
42843 emit_move_insn (tmp2, GEN_INT (vcall_offset));
42844 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
42847 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
42848 if (Pmode != ptr_mode)
42849 emit_insn (gen_addsi_1_zext (this_reg,
42850 gen_rtx_REG (ptr_mode,
42854 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
42857 /* If necessary, drop THIS back to its stack slot. */
42858 if (this_reg && this_reg != this_param)
42859 emit_move_insn (this_param, this_reg);
42861 fnaddr = XEXP (DECL_RTL (function), 0);
42864 if (!flag_pic || targetm.binds_local_p (function)
42869 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
42870 tmp = gen_rtx_CONST (Pmode, tmp);
42871 fnaddr = gen_const_mem (Pmode, tmp);
42876 if (!flag_pic || targetm.binds_local_p (function))
42879 else if (TARGET_MACHO)
42881 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
42882 fnaddr = XEXP (fnaddr, 0);
42884 #endif /* TARGET_MACHO */
42887 tmp = gen_rtx_REG (Pmode, CX_REG);
42888 output_set_got (tmp, NULL_RTX);
42890 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
42891 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
42892 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
42893 fnaddr = gen_const_mem (Pmode, fnaddr);
42897 /* Our sibling call patterns do not allow memories, because we have no
42898 predicate that can distinguish between frame and non-frame memory.
42899 For our purposes here, we can get away with (ab)using a jump pattern,
42900 because we're going to do no optimization. */
42901 if (MEM_P (fnaddr))
42903 if (sibcall_insn_operand (fnaddr, word_mode))
42905 fnaddr = XEXP (DECL_RTL (function), 0);
42906 tmp = gen_rtx_MEM (QImode, fnaddr);
42907 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
42908 tmp = emit_call_insn (tmp);
42909 SIBLING_CALL_P (tmp) = 1;
42912 emit_jump_insn (gen_indirect_jump (fnaddr));
42916 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
42918 // CM_LARGE_PIC always uses pseudo PIC register which is
42919 // uninitialized. Since FUNCTION is local and calling it
42920 // doesn't go through PLT, we use scratch register %r11 as
42921 // PIC register and initialize it here.
42922 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
42923 ix86_init_large_pic_reg (tmp_regno);
42924 fnaddr = legitimize_pic_address (fnaddr,
42925 gen_rtx_REG (Pmode, tmp_regno));
42928 if (!sibcall_insn_operand (fnaddr, word_mode))
42930 tmp = gen_rtx_REG (word_mode, tmp_regno);
42931 if (GET_MODE (fnaddr) != word_mode)
42932 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
42933 emit_move_insn (tmp, fnaddr);
42937 tmp = gen_rtx_MEM (QImode, fnaddr);
42938 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
42939 tmp = emit_call_insn (tmp);
42940 SIBLING_CALL_P (tmp) = 1;
42944 /* Emit just enough of rest_of_compilation to get the insns emitted.
42945 Note that use_thunk calls assemble_start_function et al. */
42946 insn = get_insns ();
42947 shorten_branches (insn);
42948 final_start_function (insn, file, 1);
42949 final (insn, file, 1);
42950 final_end_function ();
42954 x86_file_start (void)
42956 default_file_start ();
42958 fputs ("\t.code16gcc\n", asm_out_file);
42960 darwin_file_start ();
42962 if (X86_FILE_START_VERSION_DIRECTIVE)
42963 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
42964 if (X86_FILE_START_FLTUSED)
42965 fputs ("\t.global\t__fltused\n", asm_out_file);
42966 if (ix86_asm_dialect == ASM_INTEL)
42967 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
42971 x86_field_alignment (tree field, int computed)
42974 tree type = TREE_TYPE (field);
42976 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
42978 mode = TYPE_MODE (strip_array_types (type));
42979 if (mode == DFmode || mode == DCmode
42980 || GET_MODE_CLASS (mode) == MODE_INT
42981 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
42982 return MIN (32, computed);
42986 /* Print call to TARGET to FILE. */
42989 x86_print_call_or_nop (FILE *file, const char *target)
42991 if (flag_nop_mcount)
42992 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
42994 fprintf (file, "1:\tcall\t%s\n", target);
42997 /* Output assembler code to FILE to increment profiler label # LABELNO
42998 for profiling a function entry. */
43000 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
43002 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
43006 #ifndef NO_PROFILE_COUNTERS
43007 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
43010 if (!TARGET_PECOFF && flag_pic)
43011 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
43013 x86_print_call_or_nop (file, mcount_name);
43017 #ifndef NO_PROFILE_COUNTERS
43018 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
43021 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
43025 #ifndef NO_PROFILE_COUNTERS
43026 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
43029 x86_print_call_or_nop (file, mcount_name);
43032 if (flag_record_mcount)
43034 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
43035 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
43036 fprintf (file, "\t.previous\n");
43040 /* We don't have exact information about the insn sizes, but we may assume
43041 quite safely that we are informed about all 1 byte insns and memory
43042 address sizes. This is enough to eliminate unnecessary padding in
43046 min_insn_size (rtx_insn *insn)
43050 if (!INSN_P (insn) || !active_insn_p (insn))
43053 /* Discard alignments we've emit and jump instructions. */
43054 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43055 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43058 /* Important case - calls are always 5 bytes.
43059 It is common to have many calls in the row. */
43061 && symbolic_reference_mentioned_p (PATTERN (insn))
43062 && !SIBLING_CALL_P (insn))
43064 len = get_attr_length (insn);
43068 /* For normal instructions we rely on get_attr_length being exact,
43069 with a few exceptions. */
43070 if (!JUMP_P (insn))
43072 enum attr_type type = get_attr_type (insn);
43077 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43078 || asm_noperands (PATTERN (insn)) >= 0)
43085 /* Otherwise trust get_attr_length. */
43089 l = get_attr_length_address (insn);
43090 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43099 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43101 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43105 ix86_avoid_jump_mispredicts (void)
43107 rtx_insn *insn, *start = get_insns ();
43108 int nbytes = 0, njumps = 0;
43109 bool isjump = false;
43111 /* Look for all minimal intervals of instructions containing 4 jumps.
43112 The intervals are bounded by START and INSN. NBYTES is the total
43113 size of instructions in the interval including INSN and not including
43114 START. When the NBYTES is smaller than 16 bytes, it is possible
43115 that the end of START and INSN ends up in the same 16byte page.
43117 The smallest offset in the page INSN can start is the case where START
43118 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
43119 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43121 Don't consider asm goto as jump, while it can contain a jump, it doesn't
43122 have to, control transfer to label(s) can be performed through other
43123 means, and also we estimate minimum length of all asm stmts as 0. */
43124 for (insn = start; insn; insn = NEXT_INSN (insn))
43128 if (LABEL_P (insn))
43130 int align = label_to_alignment (insn);
43131 int max_skip = label_to_max_skip (insn);
43135 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43136 already in the current 16 byte page, because otherwise
43137 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43138 bytes to reach 16 byte boundary. */
43140 || (align <= 3 && max_skip != (1 << align) - 1))
43143 fprintf (dump_file, "Label %i with max_skip %i\n",
43144 INSN_UID (insn), max_skip);
43147 while (nbytes + max_skip >= 16)
43149 start = NEXT_INSN (start);
43150 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43152 njumps--, isjump = true;
43155 nbytes -= min_insn_size (start);
43161 min_size = min_insn_size (insn);
43162 nbytes += min_size;
43164 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43165 INSN_UID (insn), min_size);
43166 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43174 start = NEXT_INSN (start);
43175 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43177 njumps--, isjump = true;
43180 nbytes -= min_insn_size (start);
43182 gcc_assert (njumps >= 0);
43184 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43185 INSN_UID (start), INSN_UID (insn), nbytes);
43187 if (njumps == 3 && isjump && nbytes < 16)
43189 int padsize = 15 - nbytes + min_insn_size (insn);
43192 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43193 INSN_UID (insn), padsize);
43194 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43200 /* AMD Athlon works faster
43201 when RET is not destination of conditional jump or directly preceded
43202 by other jump instruction. We avoid the penalty by inserting NOP just
43203 before the RET instructions in such cases. */
43205 ix86_pad_returns (void)
43210 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43212 basic_block bb = e->src;
43213 rtx_insn *ret = BB_END (bb);
43215 bool replace = false;
43217 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43218 || optimize_bb_for_size_p (bb))
43220 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43221 if (active_insn_p (prev) || LABEL_P (prev))
43223 if (prev && LABEL_P (prev))
43228 FOR_EACH_EDGE (e, ei, bb->preds)
43229 if (EDGE_FREQUENCY (e) && e->src->index >= 0
43230 && !(e->flags & EDGE_FALLTHRU))
43238 prev = prev_active_insn (ret);
43240 && ((JUMP_P (prev) && any_condjump_p (prev))
43243 /* Empty functions get branch mispredict even when
43244 the jump destination is not visible to us. */
43245 if (!prev && !optimize_function_for_size_p (cfun))
43250 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43256 /* Count the minimum number of instructions in BB. Return 4 if the
43257 number of instructions >= 4. */
43260 ix86_count_insn_bb (basic_block bb)
43263 int insn_count = 0;
43265 /* Count number of instructions in this block. Return 4 if the number
43266 of instructions >= 4. */
43267 FOR_BB_INSNS (bb, insn)
43269 /* Only happen in exit blocks. */
43271 && ANY_RETURN_P (PATTERN (insn)))
43274 if (NONDEBUG_INSN_P (insn)
43275 && GET_CODE (PATTERN (insn)) != USE
43276 && GET_CODE (PATTERN (insn)) != CLOBBER)
43279 if (insn_count >= 4)
43288 /* Count the minimum number of instructions in code path in BB.
43289 Return 4 if the number of instructions >= 4. */
43292 ix86_count_insn (basic_block bb)
43296 int min_prev_count;
43298 /* Only bother counting instructions along paths with no
43299 more than 2 basic blocks between entry and exit. Given
43300 that BB has an edge to exit, determine if a predecessor
43301 of BB has an edge from entry. If so, compute the number
43302 of instructions in the predecessor block. If there
43303 happen to be multiple such blocks, compute the minimum. */
43304 min_prev_count = 4;
43305 FOR_EACH_EDGE (e, ei, bb->preds)
43308 edge_iterator prev_ei;
43310 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43312 min_prev_count = 0;
43315 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43317 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43319 int count = ix86_count_insn_bb (e->src);
43320 if (count < min_prev_count)
43321 min_prev_count = count;
43327 if (min_prev_count < 4)
43328 min_prev_count += ix86_count_insn_bb (bb);
43330 return min_prev_count;
43333 /* Pad short function to 4 instructions. */
43336 ix86_pad_short_function (void)
43341 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43343 rtx_insn *ret = BB_END (e->src);
43344 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43346 int insn_count = ix86_count_insn (e->src);
43348 /* Pad short function. */
43349 if (insn_count < 4)
43351 rtx_insn *insn = ret;
43353 /* Find epilogue. */
43356 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43357 insn = PREV_INSN (insn);
43362 /* Two NOPs count as one instruction. */
43363 insn_count = 2 * (4 - insn_count);
43364 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43370 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43371 the epilogue, the Windows system unwinder will apply epilogue logic and
43372 produce incorrect offsets. This can be avoided by adding a nop between
43373 the last insn that can throw and the first insn of the epilogue. */
43376 ix86_seh_fixup_eh_fallthru (void)
43381 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43383 rtx_insn *insn, *next;
43385 /* Find the beginning of the epilogue. */
43386 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43387 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43392 /* We only care about preceding insns that can throw. */
43393 insn = prev_active_insn (insn);
43394 if (insn == NULL || !can_throw_internal (insn))
43397 /* Do not separate calls from their debug information. */
43398 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43400 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43401 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43406 emit_insn_after (gen_nops (const1_rtx), insn);
43410 /* Implement machine specific optimizations. We implement padding of returns
43411 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43415 /* We are freeing block_for_insn in the toplev to keep compatibility
43416 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43417 compute_bb_for_insn ();
43419 if (TARGET_SEH && current_function_has_exception_handlers ())
43420 ix86_seh_fixup_eh_fallthru ();
43422 if (optimize && optimize_function_for_speed_p (cfun))
43424 if (TARGET_PAD_SHORT_FUNCTION)
43425 ix86_pad_short_function ();
43426 else if (TARGET_PAD_RETURNS)
43427 ix86_pad_returns ();
43428 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43429 if (TARGET_FOUR_JUMP_LIMIT)
43430 ix86_avoid_jump_mispredicts ();
43435 /* Return nonzero when QImode register that must be represented via REX prefix
43438 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43441 extract_insn_cached (insn);
43442 for (i = 0; i < recog_data.n_operands; i++)
43443 if (GENERAL_REG_P (recog_data.operand[i])
43444 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43449 /* Return true when INSN mentions register that must be encoded using REX
43452 x86_extended_reg_mentioned_p (rtx insn)
43454 subrtx_iterator::array_type array;
43455 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43457 const_rtx x = *iter;
43459 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43465 /* If profitable, negate (without causing overflow) integer constant
43466 of mode MODE at location LOC. Return true in this case. */
43468 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43472 if (!CONST_INT_P (*loc))
43478 /* DImode x86_64 constants must fit in 32 bits. */
43479 gcc_assert (x86_64_immediate_operand (*loc, mode));
43490 gcc_unreachable ();
43493 /* Avoid overflows. */
43494 if (mode_signbit_p (mode, *loc))
43497 val = INTVAL (*loc);
43499 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43500 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43501 if ((val < 0 && val != -128)
43504 *loc = GEN_INT (-val);
43511 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43512 optabs would emit if we didn't have TFmode patterns. */
43515 x86_emit_floatuns (rtx operands[2])
43517 rtx_code_label *neglab, *donelab;
43518 rtx i0, i1, f0, in, out;
43519 machine_mode mode, inmode;
43521 inmode = GET_MODE (operands[1]);
43522 gcc_assert (inmode == SImode || inmode == DImode);
43525 in = force_reg (inmode, operands[1]);
43526 mode = GET_MODE (out);
43527 neglab = gen_label_rtx ();
43528 donelab = gen_label_rtx ();
43529 f0 = gen_reg_rtx (mode);
43531 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43533 expand_float (out, in, 0);
43535 emit_jump_insn (gen_jump (donelab));
43538 emit_label (neglab);
43540 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43542 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43544 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43546 expand_float (f0, i0, 0);
43548 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
43550 emit_label (donelab);
43553 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43554 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43555 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43556 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43558 /* Get a vector mode of the same size as the original but with elements
43559 twice as wide. This is only guaranteed to apply to integral vectors. */
43561 static inline machine_mode
43562 get_mode_wider_vector (machine_mode o)
43564 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43565 machine_mode n = GET_MODE_WIDER_MODE (o);
43566 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43567 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43571 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43572 fill target with val via vec_duplicate. */
43575 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43581 /* First attempt to recognize VAL as-is. */
43582 dup = gen_rtx_VEC_DUPLICATE (mode, val);
43583 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
43584 if (recog_memoized (insn) < 0)
43587 /* If that fails, force VAL into a register. */
43590 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
43591 seq = get_insns ();
43594 emit_insn_before (seq, insn);
43596 ok = recog_memoized (insn) >= 0;
43602 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43603 with all elements equal to VAR. Return true if successful. */
43606 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
43607 rtx target, rtx val)
43631 return ix86_vector_duplicate_value (mode, target, val);
43636 if (TARGET_SSE || TARGET_3DNOW_A)
43640 val = gen_lowpart (SImode, val);
43641 x = gen_rtx_TRUNCATE (HImode, val);
43642 x = gen_rtx_VEC_DUPLICATE (mode, x);
43643 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43655 return ix86_vector_duplicate_value (mode, target, val);
43659 struct expand_vec_perm_d dperm;
43663 memset (&dperm, 0, sizeof (dperm));
43664 dperm.target = target;
43665 dperm.vmode = mode;
43666 dperm.nelt = GET_MODE_NUNITS (mode);
43667 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
43668 dperm.one_operand_p = true;
43670 /* Extend to SImode using a paradoxical SUBREG. */
43671 tmp1 = gen_reg_rtx (SImode);
43672 emit_move_insn (tmp1, gen_lowpart (SImode, val));
43674 /* Insert the SImode value as low element of a V4SImode vector. */
43675 tmp2 = gen_reg_rtx (V4SImode);
43676 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
43677 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
43679 ok = (expand_vec_perm_1 (&dperm)
43680 || expand_vec_perm_broadcast_1 (&dperm));
43688 return ix86_vector_duplicate_value (mode, target, val);
43695 /* Replicate the value once into the next wider mode and recurse. */
43697 machine_mode smode, wsmode, wvmode;
43700 smode = GET_MODE_INNER (mode);
43701 wvmode = get_mode_wider_vector (mode);
43702 wsmode = GET_MODE_INNER (wvmode);
43704 val = convert_modes (wsmode, smode, val, true);
43705 x = expand_simple_binop (wsmode, ASHIFT, val,
43706 GEN_INT (GET_MODE_BITSIZE (smode)),
43707 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43708 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
43710 x = gen_reg_rtx (wvmode);
43711 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
43713 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
43720 return ix86_vector_duplicate_value (mode, target, val);
43723 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
43724 rtx x = gen_reg_rtx (hvmode);
43726 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43729 x = gen_rtx_VEC_CONCAT (mode, x, x);
43730 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43736 if (TARGET_AVX512BW)
43737 return ix86_vector_duplicate_value (mode, target, val);
43740 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
43741 rtx x = gen_reg_rtx (hvmode);
43743 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43746 x = gen_rtx_VEC_CONCAT (mode, x, x);
43747 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43756 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43757 whose ONE_VAR element is VAR, and other elements are zero. Return true
43761 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
43762 rtx target, rtx var, int one_var)
43764 machine_mode vsimode;
43767 bool use_vector_set = false;
43772 /* For SSE4.1, we normally use vector set. But if the second
43773 element is zero and inter-unit moves are OK, we use movq
43775 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
43776 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
43782 use_vector_set = TARGET_SSE4_1;
43785 use_vector_set = TARGET_SSE2;
43788 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
43795 use_vector_set = TARGET_AVX;
43798 /* Use ix86_expand_vector_set in 64bit mode only. */
43799 use_vector_set = TARGET_AVX && TARGET_64BIT;
43805 if (use_vector_set)
43807 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
43808 var = force_reg (GET_MODE_INNER (mode), var);
43809 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43825 var = force_reg (GET_MODE_INNER (mode), var);
43826 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
43827 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43832 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
43833 new_target = gen_reg_rtx (mode);
43835 new_target = target;
43836 var = force_reg (GET_MODE_INNER (mode), var);
43837 x = gen_rtx_VEC_DUPLICATE (mode, var);
43838 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
43839 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
43842 /* We need to shuffle the value to the correct position, so
43843 create a new pseudo to store the intermediate result. */
43845 /* With SSE2, we can use the integer shuffle insns. */
43846 if (mode != V4SFmode && TARGET_SSE2)
43848 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
43850 GEN_INT (one_var == 1 ? 0 : 1),
43851 GEN_INT (one_var == 2 ? 0 : 1),
43852 GEN_INT (one_var == 3 ? 0 : 1)));
43853 if (target != new_target)
43854 emit_move_insn (target, new_target);
43858 /* Otherwise convert the intermediate result to V4SFmode and
43859 use the SSE1 shuffle instructions. */
43860 if (mode != V4SFmode)
43862 tmp = gen_reg_rtx (V4SFmode);
43863 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
43868 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
43870 GEN_INT (one_var == 1 ? 0 : 1),
43871 GEN_INT (one_var == 2 ? 0+4 : 1+4),
43872 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
43874 if (mode != V4SFmode)
43875 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
43876 else if (tmp != target)
43877 emit_move_insn (target, tmp);
43879 else if (target != new_target)
43880 emit_move_insn (target, new_target);
43885 vsimode = V4SImode;
43891 vsimode = V2SImode;
43897 /* Zero extend the variable element to SImode and recurse. */
43898 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
43900 x = gen_reg_rtx (vsimode);
43901 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
43903 gcc_unreachable ();
43905 emit_move_insn (target, gen_lowpart (mode, x));
43913 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43914 consisting of the values in VALS. It is known that all elements
43915 except ONE_VAR are constants. Return true if successful. */
43918 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
43919 rtx target, rtx vals, int one_var)
43921 rtx var = XVECEXP (vals, 0, one_var);
43922 machine_mode wmode;
43925 const_vec = copy_rtx (vals);
43926 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
43927 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
43935 /* For the two element vectors, it's just as easy to use
43936 the general case. */
43940 /* Use ix86_expand_vector_set in 64bit mode only. */
43963 /* There's no way to set one QImode entry easily. Combine
43964 the variable value with its adjacent constant value, and
43965 promote to an HImode set. */
43966 x = XVECEXP (vals, 0, one_var ^ 1);
43969 var = convert_modes (HImode, QImode, var, true);
43970 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
43971 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43972 x = GEN_INT (INTVAL (x) & 0xff);
43976 var = convert_modes (HImode, QImode, var, true);
43977 x = gen_int_mode (INTVAL (x) << 8, HImode);
43979 if (x != const0_rtx)
43980 var = expand_simple_binop (HImode, IOR, var, x, var,
43981 1, OPTAB_LIB_WIDEN);
43983 x = gen_reg_rtx (wmode);
43984 emit_move_insn (x, gen_lowpart (wmode, const_vec));
43985 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
43987 emit_move_insn (target, gen_lowpart (mode, x));
43994 emit_move_insn (target, const_vec);
43995 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43999 /* A subroutine of ix86_expand_vector_init_general. Use vector
44000 concatenate to handle the most general case: all values variable,
44001 and none identical. */
44004 ix86_expand_vector_init_concat (machine_mode mode,
44005 rtx target, rtx *ops, int n)
44007 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
44008 rtx first[16], second[8], third[4];
44060 gcc_unreachable ();
44063 if (!register_operand (ops[1], cmode))
44064 ops[1] = force_reg (cmode, ops[1]);
44065 if (!register_operand (ops[0], cmode))
44066 ops[0] = force_reg (cmode, ops[0]);
44067 emit_insn (gen_rtx_SET (VOIDmode, target,
44068 gen_rtx_VEC_CONCAT (mode, ops[0],
44088 gcc_unreachable ();
44112 gcc_unreachable ();
44130 gcc_unreachable ();
44135 /* FIXME: We process inputs backward to help RA. PR 36222. */
44138 for (; i > 0; i -= 2, j--)
44140 first[j] = gen_reg_rtx (cmode);
44141 v = gen_rtvec (2, ops[i - 1], ops[i]);
44142 ix86_expand_vector_init (false, first[j],
44143 gen_rtx_PARALLEL (cmode, v));
44149 gcc_assert (hmode != VOIDmode);
44150 gcc_assert (gmode != VOIDmode);
44151 for (i = j = 0; i < n; i += 2, j++)
44153 second[j] = gen_reg_rtx (hmode);
44154 ix86_expand_vector_init_concat (hmode, second [j],
44158 for (i = j = 0; i < n; i += 2, j++)
44160 third[j] = gen_reg_rtx (gmode);
44161 ix86_expand_vector_init_concat (gmode, third[j],
44165 ix86_expand_vector_init_concat (mode, target, third, n);
44169 gcc_assert (hmode != VOIDmode);
44170 for (i = j = 0; i < n; i += 2, j++)
44172 second[j] = gen_reg_rtx (hmode);
44173 ix86_expand_vector_init_concat (hmode, second [j],
44177 ix86_expand_vector_init_concat (mode, target, second, n);
44180 ix86_expand_vector_init_concat (mode, target, first, n);
44184 gcc_unreachable ();
44188 /* A subroutine of ix86_expand_vector_init_general. Use vector
44189 interleave to handle the most general case: all values variable,
44190 and none identical. */
44193 ix86_expand_vector_init_interleave (machine_mode mode,
44194 rtx target, rtx *ops, int n)
44196 machine_mode first_imode, second_imode, third_imode, inner_mode;
44199 rtx (*gen_load_even) (rtx, rtx, rtx);
44200 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44201 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44206 gen_load_even = gen_vec_setv8hi;
44207 gen_interleave_first_low = gen_vec_interleave_lowv4si;
44208 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44209 inner_mode = HImode;
44210 first_imode = V4SImode;
44211 second_imode = V2DImode;
44212 third_imode = VOIDmode;
44215 gen_load_even = gen_vec_setv16qi;
44216 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44217 gen_interleave_second_low = gen_vec_interleave_lowv4si;
44218 inner_mode = QImode;
44219 first_imode = V8HImode;
44220 second_imode = V4SImode;
44221 third_imode = V2DImode;
44224 gcc_unreachable ();
44227 for (i = 0; i < n; i++)
44229 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44230 op0 = gen_reg_rtx (SImode);
44231 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44233 /* Insert the SImode value as low element of V4SImode vector. */
44234 op1 = gen_reg_rtx (V4SImode);
44235 op0 = gen_rtx_VEC_MERGE (V4SImode,
44236 gen_rtx_VEC_DUPLICATE (V4SImode,
44238 CONST0_RTX (V4SImode),
44240 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
44242 /* Cast the V4SImode vector back to a vector in orignal mode. */
44243 op0 = gen_reg_rtx (mode);
44244 emit_move_insn (op0, gen_lowpart (mode, op1));
44246 /* Load even elements into the second position. */
44247 emit_insn (gen_load_even (op0,
44248 force_reg (inner_mode,
44252 /* Cast vector to FIRST_IMODE vector. */
44253 ops[i] = gen_reg_rtx (first_imode);
44254 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44257 /* Interleave low FIRST_IMODE vectors. */
44258 for (i = j = 0; i < n; i += 2, j++)
44260 op0 = gen_reg_rtx (first_imode);
44261 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44263 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44264 ops[j] = gen_reg_rtx (second_imode);
44265 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44268 /* Interleave low SECOND_IMODE vectors. */
44269 switch (second_imode)
44272 for (i = j = 0; i < n / 2; i += 2, j++)
44274 op0 = gen_reg_rtx (second_imode);
44275 emit_insn (gen_interleave_second_low (op0, ops[i],
44278 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44280 ops[j] = gen_reg_rtx (third_imode);
44281 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44283 second_imode = V2DImode;
44284 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44288 op0 = gen_reg_rtx (second_imode);
44289 emit_insn (gen_interleave_second_low (op0, ops[0],
44292 /* Cast the SECOND_IMODE vector back to a vector on original
44294 emit_insn (gen_rtx_SET (VOIDmode, target,
44295 gen_lowpart (mode, op0)));
44299 gcc_unreachable ();
44303 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44304 all values variable, and none identical. */
44307 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44308 rtx target, rtx vals)
44310 rtx ops[64], op0, op1, op2, op3, op4, op5;
44311 machine_mode half_mode = VOIDmode;
44312 machine_mode quarter_mode = VOIDmode;
44319 if (!mmx_ok && !TARGET_SSE)
44335 n = GET_MODE_NUNITS (mode);
44336 for (i = 0; i < n; i++)
44337 ops[i] = XVECEXP (vals, 0, i);
44338 ix86_expand_vector_init_concat (mode, target, ops, n);
44342 half_mode = V16QImode;
44346 half_mode = V8HImode;
44350 n = GET_MODE_NUNITS (mode);
44351 for (i = 0; i < n; i++)
44352 ops[i] = XVECEXP (vals, 0, i);
44353 op0 = gen_reg_rtx (half_mode);
44354 op1 = gen_reg_rtx (half_mode);
44355 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44357 ix86_expand_vector_init_interleave (half_mode, op1,
44358 &ops [n >> 1], n >> 2);
44359 emit_insn (gen_rtx_SET (VOIDmode, target,
44360 gen_rtx_VEC_CONCAT (mode, op0, op1)));
44364 quarter_mode = V16QImode;
44365 half_mode = V32QImode;
44369 quarter_mode = V8HImode;
44370 half_mode = V16HImode;
44374 n = GET_MODE_NUNITS (mode);
44375 for (i = 0; i < n; i++)
44376 ops[i] = XVECEXP (vals, 0, i);
44377 op0 = gen_reg_rtx (quarter_mode);
44378 op1 = gen_reg_rtx (quarter_mode);
44379 op2 = gen_reg_rtx (quarter_mode);
44380 op3 = gen_reg_rtx (quarter_mode);
44381 op4 = gen_reg_rtx (half_mode);
44382 op5 = gen_reg_rtx (half_mode);
44383 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44385 ix86_expand_vector_init_interleave (quarter_mode, op1,
44386 &ops [n >> 2], n >> 3);
44387 ix86_expand_vector_init_interleave (quarter_mode, op2,
44388 &ops [n >> 1], n >> 3);
44389 ix86_expand_vector_init_interleave (quarter_mode, op3,
44390 &ops [(n >> 1) | (n >> 2)], n >> 3);
44391 emit_insn (gen_rtx_SET (VOIDmode, op4,
44392 gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44393 emit_insn (gen_rtx_SET (VOIDmode, op5,
44394 gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44395 emit_insn (gen_rtx_SET (VOIDmode, target,
44396 gen_rtx_VEC_CONCAT (mode, op4, op5)));
44400 if (!TARGET_SSE4_1)
44408 /* Don't use ix86_expand_vector_init_interleave if we can't
44409 move from GPR to SSE register directly. */
44410 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44413 n = GET_MODE_NUNITS (mode);
44414 for (i = 0; i < n; i++)
44415 ops[i] = XVECEXP (vals, 0, i);
44416 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44424 gcc_unreachable ();
44428 int i, j, n_elts, n_words, n_elt_per_word;
44429 machine_mode inner_mode;
44430 rtx words[4], shift;
44432 inner_mode = GET_MODE_INNER (mode);
44433 n_elts = GET_MODE_NUNITS (mode);
44434 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44435 n_elt_per_word = n_elts / n_words;
44436 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44438 for (i = 0; i < n_words; ++i)
44440 rtx word = NULL_RTX;
44442 for (j = 0; j < n_elt_per_word; ++j)
44444 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44445 elt = convert_modes (word_mode, inner_mode, elt, true);
44451 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44452 word, 1, OPTAB_LIB_WIDEN);
44453 word = expand_simple_binop (word_mode, IOR, word, elt,
44454 word, 1, OPTAB_LIB_WIDEN);
44462 emit_move_insn (target, gen_lowpart (mode, words[0]));
44463 else if (n_words == 2)
44465 rtx tmp = gen_reg_rtx (mode);
44466 emit_clobber (tmp);
44467 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44468 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44469 emit_move_insn (target, tmp);
44471 else if (n_words == 4)
44473 rtx tmp = gen_reg_rtx (V4SImode);
44474 gcc_assert (word_mode == SImode);
44475 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44476 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44477 emit_move_insn (target, gen_lowpart (mode, tmp));
44480 gcc_unreachable ();
44484 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44485 instructions unless MMX_OK is true. */
44488 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44490 machine_mode mode = GET_MODE (target);
44491 machine_mode inner_mode = GET_MODE_INNER (mode);
44492 int n_elts = GET_MODE_NUNITS (mode);
44493 int n_var = 0, one_var = -1;
44494 bool all_same = true, all_const_zero = true;
44498 for (i = 0; i < n_elts; ++i)
44500 x = XVECEXP (vals, 0, i);
44501 if (!(CONST_INT_P (x)
44502 || GET_CODE (x) == CONST_DOUBLE
44503 || GET_CODE (x) == CONST_FIXED))
44504 n_var++, one_var = i;
44505 else if (x != CONST0_RTX (inner_mode))
44506 all_const_zero = false;
44507 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44511 /* Constants are best loaded from the constant pool. */
44514 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44518 /* If all values are identical, broadcast the value. */
44520 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44521 XVECEXP (vals, 0, 0)))
44524 /* Values where only one field is non-constant are best loaded from
44525 the pool and overwritten via move later. */
44529 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44530 XVECEXP (vals, 0, one_var),
44534 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44538 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44542 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44544 machine_mode mode = GET_MODE (target);
44545 machine_mode inner_mode = GET_MODE_INNER (mode);
44546 machine_mode half_mode;
44547 bool use_vec_merge = false;
44549 static rtx (*gen_extract[6][2]) (rtx, rtx)
44551 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44552 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44553 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44554 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44555 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44556 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44558 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44560 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44561 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44562 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44563 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44564 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44565 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44575 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44576 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44578 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44580 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44581 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44587 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
44591 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44592 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
44594 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44596 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44597 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44604 /* For the two element vectors, we implement a VEC_CONCAT with
44605 the extraction of the other element. */
44607 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
44608 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
44611 op0 = val, op1 = tmp;
44613 op0 = tmp, op1 = val;
44615 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
44616 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44621 use_vec_merge = TARGET_SSE4_1;
44628 use_vec_merge = true;
44632 /* tmp = target = A B C D */
44633 tmp = copy_to_reg (target);
44634 /* target = A A B B */
44635 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
44636 /* target = X A B B */
44637 ix86_expand_vector_set (false, target, val, 0);
44638 /* target = A X C D */
44639 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44640 const1_rtx, const0_rtx,
44641 GEN_INT (2+4), GEN_INT (3+4)));
44645 /* tmp = target = A B C D */
44646 tmp = copy_to_reg (target);
44647 /* tmp = X B C D */
44648 ix86_expand_vector_set (false, tmp, val, 0);
44649 /* target = A B X D */
44650 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44651 const0_rtx, const1_rtx,
44652 GEN_INT (0+4), GEN_INT (3+4)));
44656 /* tmp = target = A B C D */
44657 tmp = copy_to_reg (target);
44658 /* tmp = X B C D */
44659 ix86_expand_vector_set (false, tmp, val, 0);
44660 /* target = A B X D */
44661 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44662 const0_rtx, const1_rtx,
44663 GEN_INT (2+4), GEN_INT (0+4)));
44667 gcc_unreachable ();
44672 use_vec_merge = TARGET_SSE4_1;
44676 /* Element 0 handled by vec_merge below. */
44679 use_vec_merge = true;
44685 /* With SSE2, use integer shuffles to swap element 0 and ELT,
44686 store into element 0, then shuffle them back. */
44690 order[0] = GEN_INT (elt);
44691 order[1] = const1_rtx;
44692 order[2] = const2_rtx;
44693 order[3] = GEN_INT (3);
44694 order[elt] = const0_rtx;
44696 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44697 order[1], order[2], order[3]));
44699 ix86_expand_vector_set (false, target, val, 0);
44701 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44702 order[1], order[2], order[3]));
44706 /* For SSE1, we have to reuse the V4SF code. */
44707 rtx t = gen_reg_rtx (V4SFmode);
44708 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
44709 emit_move_insn (target, gen_lowpart (mode, t));
44714 use_vec_merge = TARGET_SSE2;
44717 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44721 use_vec_merge = TARGET_SSE4_1;
44728 half_mode = V16QImode;
44734 half_mode = V8HImode;
44740 half_mode = V4SImode;
44746 half_mode = V2DImode;
44752 half_mode = V4SFmode;
44758 half_mode = V2DFmode;
44764 /* Compute offset. */
44768 gcc_assert (i <= 1);
44770 /* Extract the half. */
44771 tmp = gen_reg_rtx (half_mode);
44772 emit_insn (gen_extract[j][i] (tmp, target));
44774 /* Put val in tmp at elt. */
44775 ix86_expand_vector_set (false, tmp, val, elt);
44778 emit_insn (gen_insert[j][i] (target, target, tmp));
44782 if (TARGET_AVX512F)
44784 tmp = gen_reg_rtx (mode);
44785 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44786 gen_rtx_VEC_DUPLICATE (mode, val)));
44787 emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
44788 force_reg (QImode, GEN_INT (1 << elt))));
44794 if (TARGET_AVX512F)
44796 tmp = gen_reg_rtx (mode);
44797 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44798 gen_rtx_VEC_DUPLICATE (mode, val)));
44799 emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
44800 force_reg (QImode, GEN_INT (1 << elt))));
44806 if (TARGET_AVX512F)
44808 tmp = gen_reg_rtx (mode);
44809 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44810 gen_rtx_VEC_DUPLICATE (mode, val)));
44811 emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
44812 force_reg (HImode, GEN_INT (1 << elt))));
44818 if (TARGET_AVX512F)
44820 tmp = gen_reg_rtx (mode);
44821 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44822 gen_rtx_VEC_DUPLICATE (mode, val)));
44823 emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
44824 force_reg (HImode, GEN_INT (1 << elt))));
44830 if (TARGET_AVX512F && TARGET_AVX512BW)
44832 tmp = gen_reg_rtx (mode);
44833 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44834 gen_rtx_VEC_DUPLICATE (mode, val)));
44835 emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
44836 force_reg (SImode, GEN_INT (1 << elt))));
44842 if (TARGET_AVX512F && TARGET_AVX512BW)
44844 tmp = gen_reg_rtx (mode);
44845 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44846 gen_rtx_VEC_DUPLICATE (mode, val)));
44847 emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
44848 force_reg (DImode, GEN_INT (1 << elt))));
44860 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
44861 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
44862 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44866 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
44868 emit_move_insn (mem, target);
44870 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
44871 emit_move_insn (tmp, val);
44873 emit_move_insn (target, mem);
44878 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
44880 machine_mode mode = GET_MODE (vec);
44881 machine_mode inner_mode = GET_MODE_INNER (mode);
44882 bool use_vec_extr = false;
44895 use_vec_extr = true;
44899 use_vec_extr = TARGET_SSE4_1;
44911 tmp = gen_reg_rtx (mode);
44912 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
44913 GEN_INT (elt), GEN_INT (elt),
44914 GEN_INT (elt+4), GEN_INT (elt+4)));
44918 tmp = gen_reg_rtx (mode);
44919 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
44923 gcc_unreachable ();
44926 use_vec_extr = true;
44931 use_vec_extr = TARGET_SSE4_1;
44945 tmp = gen_reg_rtx (mode);
44946 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
44947 GEN_INT (elt), GEN_INT (elt),
44948 GEN_INT (elt), GEN_INT (elt)));
44952 tmp = gen_reg_rtx (mode);
44953 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
44957 gcc_unreachable ();
44960 use_vec_extr = true;
44965 /* For SSE1, we have to reuse the V4SF code. */
44966 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
44967 gen_lowpart (V4SFmode, vec), elt);
44973 use_vec_extr = TARGET_SSE2;
44976 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44980 use_vec_extr = TARGET_SSE4_1;
44986 tmp = gen_reg_rtx (V4SFmode);
44988 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
44990 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
44991 ix86_expand_vector_extract (false, target, tmp, elt & 3);
44999 tmp = gen_reg_rtx (V2DFmode);
45001 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
45003 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
45004 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45012 tmp = gen_reg_rtx (V16QImode);
45014 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
45016 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
45017 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45025 tmp = gen_reg_rtx (V8HImode);
45027 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
45029 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
45030 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45038 tmp = gen_reg_rtx (V4SImode);
45040 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
45042 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
45043 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45051 tmp = gen_reg_rtx (V2DImode);
45053 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45055 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45056 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45062 if (TARGET_AVX512BW)
45064 tmp = gen_reg_rtx (V16HImode);
45066 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45068 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45069 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45075 if (TARGET_AVX512BW)
45077 tmp = gen_reg_rtx (V32QImode);
45079 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45081 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45082 ix86_expand_vector_extract (false, target, tmp, elt & 31);
45088 tmp = gen_reg_rtx (V8SFmode);
45090 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45092 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45093 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45097 tmp = gen_reg_rtx (V4DFmode);
45099 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45101 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45102 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45106 tmp = gen_reg_rtx (V8SImode);
45108 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45110 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45111 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45115 tmp = gen_reg_rtx (V4DImode);
45117 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45119 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45120 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45124 /* ??? Could extract the appropriate HImode element and shift. */
45131 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45132 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45134 /* Let the rtl optimizers know about the zero extension performed. */
45135 if (inner_mode == QImode || inner_mode == HImode)
45137 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45138 target = gen_lowpart (SImode, target);
45141 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45145 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45147 emit_move_insn (mem, vec);
45149 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45150 emit_move_insn (target, tmp);
45154 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45155 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45156 The upper bits of DEST are undefined, though they shouldn't cause
45157 exceptions (some bits from src or all zeros are ok). */
45160 emit_reduc_half (rtx dest, rtx src, int i)
45163 switch (GET_MODE (src))
45167 tem = gen_sse_movhlps (dest, src, src);
45169 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45170 GEN_INT (1 + 4), GEN_INT (1 + 4));
45173 tem = gen_vec_interleave_highv2df (dest, src, src);
45179 d = gen_reg_rtx (V1TImode);
45180 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45185 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45187 tem = gen_avx_shufps256 (dest, src, src,
45188 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45192 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45194 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45202 if (GET_MODE (dest) != V4DImode)
45203 d = gen_reg_rtx (V4DImode);
45204 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45205 gen_lowpart (V4DImode, src),
45210 d = gen_reg_rtx (V2TImode);
45211 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45222 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45223 gen_lowpart (V16SImode, src),
45224 gen_lowpart (V16SImode, src),
45225 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45226 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45227 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45228 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45229 GEN_INT (0xC), GEN_INT (0xD),
45230 GEN_INT (0xE), GEN_INT (0xF),
45231 GEN_INT (0x10), GEN_INT (0x11),
45232 GEN_INT (0x12), GEN_INT (0x13),
45233 GEN_INT (0x14), GEN_INT (0x15),
45234 GEN_INT (0x16), GEN_INT (0x17));
45236 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45237 gen_lowpart (V16SImode, src),
45238 GEN_INT (i == 128 ? 0x2 : 0x1),
45242 GEN_INT (i == 128 ? 0x6 : 0x5),
45246 GEN_INT (i == 128 ? 0xA : 0x9),
45250 GEN_INT (i == 128 ? 0xE : 0xD),
45256 gcc_unreachable ();
45260 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45263 /* Expand a vector reduction. FN is the binary pattern to reduce;
45264 DEST is the destination; IN is the input vector. */
45267 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45269 rtx half, dst, vec = in;
45270 machine_mode mode = GET_MODE (in);
45273 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45275 && mode == V8HImode
45276 && fn == gen_uminv8hi3)
45278 emit_insn (gen_sse4_1_phminposuw (dest, in));
45282 for (i = GET_MODE_BITSIZE (mode);
45283 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45286 half = gen_reg_rtx (mode);
45287 emit_reduc_half (half, vec, i);
45288 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45291 dst = gen_reg_rtx (mode);
45292 emit_insn (fn (dst, half, vec));
45297 /* Target hook for scalar_mode_supported_p. */
45299 ix86_scalar_mode_supported_p (machine_mode mode)
45301 if (DECIMAL_FLOAT_MODE_P (mode))
45302 return default_decimal_float_supported_p ();
45303 else if (mode == TFmode)
45306 return default_scalar_mode_supported_p (mode);
45309 /* Implements target hook vector_mode_supported_p. */
45311 ix86_vector_mode_supported_p (machine_mode mode)
45313 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45315 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45317 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45319 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45321 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45323 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45328 /* Implement target hook libgcc_floating_mode_supported_p. */
45330 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45340 #ifdef IX86_NO_LIBGCC_TFMODE
45342 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45343 return TARGET_LONG_DOUBLE_128;
45353 /* Target hook for c_mode_for_suffix. */
45354 static machine_mode
45355 ix86_c_mode_for_suffix (char suffix)
45365 /* Worker function for TARGET_MD_ASM_CLOBBERS.
45367 We do this in the new i386 backend to maintain source compatibility
45368 with the old cc0-based compiler. */
45371 ix86_md_asm_clobbers (tree, tree, tree clobbers)
45373 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
45375 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
45380 /* Implements target vector targetm.asm.encode_section_info. */
45382 static void ATTRIBUTE_UNUSED
45383 ix86_encode_section_info (tree decl, rtx rtl, int first)
45385 default_encode_section_info (decl, rtl, first);
45387 if (ix86_in_large_data_p (decl))
45388 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45391 /* Worker function for REVERSE_CONDITION. */
45394 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45396 return (mode != CCFPmode && mode != CCFPUmode
45397 ? reverse_condition (code)
45398 : reverse_condition_maybe_unordered (code));
45401 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45405 output_387_reg_move (rtx insn, rtx *operands)
45407 if (REG_P (operands[0]))
45409 if (REG_P (operands[1])
45410 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45412 if (REGNO (operands[0]) == FIRST_STACK_REG)
45413 return output_387_ffreep (operands, 0);
45414 return "fstp\t%y0";
45416 if (STACK_TOP_P (operands[0]))
45417 return "fld%Z1\t%y1";
45420 else if (MEM_P (operands[0]))
45422 gcc_assert (REG_P (operands[1]));
45423 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45424 return "fstp%Z0\t%y0";
45427 /* There is no non-popping store to memory for XFmode.
45428 So if we need one, follow the store with a load. */
45429 if (GET_MODE (operands[0]) == XFmode)
45430 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45432 return "fst%Z0\t%y0";
45439 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45440 FP status register is set. */
45443 ix86_emit_fp_unordered_jump (rtx label)
45445 rtx reg = gen_reg_rtx (HImode);
45448 emit_insn (gen_x86_fnstsw_1 (reg));
45450 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45452 emit_insn (gen_x86_sahf_1 (reg));
45454 temp = gen_rtx_REG (CCmode, FLAGS_REG);
45455 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45459 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45461 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45462 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45465 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45466 gen_rtx_LABEL_REF (VOIDmode, label),
45468 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
45470 emit_jump_insn (temp);
45471 predict_jump (REG_BR_PROB_BASE * 10 / 100);
45474 /* Output code to perform a log1p XFmode calculation. */
45476 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45478 rtx_code_label *label1 = gen_label_rtx ();
45479 rtx_code_label *label2 = gen_label_rtx ();
45481 rtx tmp = gen_reg_rtx (XFmode);
45482 rtx tmp2 = gen_reg_rtx (XFmode);
45485 emit_insn (gen_absxf2 (tmp, op1));
45486 test = gen_rtx_GE (VOIDmode, tmp,
45487 CONST_DOUBLE_FROM_REAL_VALUE (
45488 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45490 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45492 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45493 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45494 emit_jump (label2);
45496 emit_label (label1);
45497 emit_move_insn (tmp, CONST1_RTX (XFmode));
45498 emit_insn (gen_addxf3 (tmp, op1, tmp));
45499 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45500 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45502 emit_label (label2);
45505 /* Emit code for round calculation. */
45506 void ix86_emit_i387_round (rtx op0, rtx op1)
45508 machine_mode inmode = GET_MODE (op1);
45509 machine_mode outmode = GET_MODE (op0);
45510 rtx e1, e2, res, tmp, tmp1, half;
45511 rtx scratch = gen_reg_rtx (HImode);
45512 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45513 rtx_code_label *jump_label = gen_label_rtx ();
45515 rtx (*gen_abs) (rtx, rtx);
45516 rtx (*gen_neg) (rtx, rtx);
45521 gen_abs = gen_abssf2;
45524 gen_abs = gen_absdf2;
45527 gen_abs = gen_absxf2;
45530 gcc_unreachable ();
45536 gen_neg = gen_negsf2;
45539 gen_neg = gen_negdf2;
45542 gen_neg = gen_negxf2;
45545 gen_neg = gen_neghi2;
45548 gen_neg = gen_negsi2;
45551 gen_neg = gen_negdi2;
45554 gcc_unreachable ();
45557 e1 = gen_reg_rtx (inmode);
45558 e2 = gen_reg_rtx (inmode);
45559 res = gen_reg_rtx (outmode);
45561 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
45563 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45565 /* scratch = fxam(op1) */
45566 emit_insn (gen_rtx_SET (VOIDmode, scratch,
45567 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
45569 /* e1 = fabs(op1) */
45570 emit_insn (gen_abs (e1, op1));
45572 /* e2 = e1 + 0.5 */
45573 half = force_reg (inmode, half);
45574 emit_insn (gen_rtx_SET (VOIDmode, e2,
45575 gen_rtx_PLUS (inmode, e1, half)));
45577 /* res = floor(e2) */
45578 if (inmode != XFmode)
45580 tmp1 = gen_reg_rtx (XFmode);
45582 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
45583 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
45593 rtx tmp0 = gen_reg_rtx (XFmode);
45595 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
45597 emit_insn (gen_rtx_SET (VOIDmode, res,
45598 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
45599 UNSPEC_TRUNC_NOOP)));
45603 emit_insn (gen_frndintxf2_floor (res, tmp1));
45606 emit_insn (gen_lfloorxfhi2 (res, tmp1));
45609 emit_insn (gen_lfloorxfsi2 (res, tmp1));
45612 emit_insn (gen_lfloorxfdi2 (res, tmp1));
45615 gcc_unreachable ();
45618 /* flags = signbit(a) */
45619 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
45621 /* if (flags) then res = -res */
45622 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
45623 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
45624 gen_rtx_LABEL_REF (VOIDmode, jump_label),
45626 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
45627 predict_jump (REG_BR_PROB_BASE * 50 / 100);
45628 JUMP_LABEL (insn) = jump_label;
45630 emit_insn (gen_neg (res, res));
45632 emit_label (jump_label);
45633 LABEL_NUSES (jump_label) = 1;
45635 emit_move_insn (op0, res);
45638 /* Output code to perform a Newton-Rhapson approximation of a single precision
45639 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
45641 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
45643 rtx x0, x1, e0, e1;
45645 x0 = gen_reg_rtx (mode);
45646 e0 = gen_reg_rtx (mode);
45647 e1 = gen_reg_rtx (mode);
45648 x1 = gen_reg_rtx (mode);
45650 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
45652 b = force_reg (mode, b);
45654 /* x0 = rcp(b) estimate */
45655 if (mode == V16SFmode || mode == V8DFmode)
45656 emit_insn (gen_rtx_SET (VOIDmode, x0,
45657 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45660 emit_insn (gen_rtx_SET (VOIDmode, x0,
45661 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45665 emit_insn (gen_rtx_SET (VOIDmode, e0,
45666 gen_rtx_MULT (mode, x0, b)));
45669 emit_insn (gen_rtx_SET (VOIDmode, e0,
45670 gen_rtx_MULT (mode, x0, e0)));
45673 emit_insn (gen_rtx_SET (VOIDmode, e1,
45674 gen_rtx_PLUS (mode, x0, x0)));
45677 emit_insn (gen_rtx_SET (VOIDmode, x1,
45678 gen_rtx_MINUS (mode, e1, e0)));
45681 emit_insn (gen_rtx_SET (VOIDmode, res,
45682 gen_rtx_MULT (mode, a, x1)));
45685 /* Output code to perform a Newton-Rhapson approximation of a
45686 single precision floating point [reciprocal] square root. */
45688 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
45691 rtx x0, e0, e1, e2, e3, mthree, mhalf;
45695 x0 = gen_reg_rtx (mode);
45696 e0 = gen_reg_rtx (mode);
45697 e1 = gen_reg_rtx (mode);
45698 e2 = gen_reg_rtx (mode);
45699 e3 = gen_reg_rtx (mode);
45701 real_from_integer (&r, VOIDmode, -3, SIGNED);
45702 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45704 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
45705 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45706 unspec = UNSPEC_RSQRT;
45708 if (VECTOR_MODE_P (mode))
45710 mthree = ix86_build_const_vector (mode, true, mthree);
45711 mhalf = ix86_build_const_vector (mode, true, mhalf);
45712 /* There is no 512-bit rsqrt. There is however rsqrt14. */
45713 if (GET_MODE_SIZE (mode) == 64)
45714 unspec = UNSPEC_RSQRT14;
45717 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
45718 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
45720 a = force_reg (mode, a);
45722 /* x0 = rsqrt(a) estimate */
45723 emit_insn (gen_rtx_SET (VOIDmode, x0,
45724 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45727 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
45732 zero = gen_reg_rtx (mode);
45733 mask = gen_reg_rtx (mode);
45735 zero = force_reg (mode, CONST0_RTX(mode));
45737 /* Handle masked compare. */
45738 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
45740 mask = gen_reg_rtx (HImode);
45741 /* Imm value 0x4 corresponds to not-equal comparison. */
45742 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
45743 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
45747 emit_insn (gen_rtx_SET (VOIDmode, mask,
45748 gen_rtx_NE (mode, zero, a)));
45750 emit_insn (gen_rtx_SET (VOIDmode, x0,
45751 gen_rtx_AND (mode, x0, mask)));
45756 emit_insn (gen_rtx_SET (VOIDmode, e0,
45757 gen_rtx_MULT (mode, x0, a)));
45759 emit_insn (gen_rtx_SET (VOIDmode, e1,
45760 gen_rtx_MULT (mode, e0, x0)));
45763 mthree = force_reg (mode, mthree);
45764 emit_insn (gen_rtx_SET (VOIDmode, e2,
45765 gen_rtx_PLUS (mode, e1, mthree)));
45767 mhalf = force_reg (mode, mhalf);
45769 /* e3 = -.5 * x0 */
45770 emit_insn (gen_rtx_SET (VOIDmode, e3,
45771 gen_rtx_MULT (mode, x0, mhalf)));
45773 /* e3 = -.5 * e0 */
45774 emit_insn (gen_rtx_SET (VOIDmode, e3,
45775 gen_rtx_MULT (mode, e0, mhalf)));
45776 /* ret = e2 * e3 */
45777 emit_insn (gen_rtx_SET (VOIDmode, res,
45778 gen_rtx_MULT (mode, e2, e3)));
45781 #ifdef TARGET_SOLARIS
45782 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
45785 i386_solaris_elf_named_section (const char *name, unsigned int flags,
45788 /* With Binutils 2.15, the "@unwind" marker must be specified on
45789 every occurrence of the ".eh_frame" section, not just the first
45792 && strcmp (name, ".eh_frame") == 0)
45794 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
45795 flags & SECTION_WRITE ? "aw" : "a");
45800 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
45802 solaris_elf_asm_comdat_section (name, flags, decl);
45807 default_elf_asm_named_section (name, flags, decl);
45809 #endif /* TARGET_SOLARIS */
45811 /* Return the mangling of TYPE if it is an extended fundamental type. */
45813 static const char *
45814 ix86_mangle_type (const_tree type)
45816 type = TYPE_MAIN_VARIANT (type);
45818 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
45819 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
45822 switch (TYPE_MODE (type))
45825 /* __float128 is "g". */
45828 /* "long double" or __float80 is "e". */
45835 /* For 32-bit code we can save PIC register setup by using
45836 __stack_chk_fail_local hidden function instead of calling
45837 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
45838 register, so it is better to call __stack_chk_fail directly. */
45840 static tree ATTRIBUTE_UNUSED
45841 ix86_stack_protect_fail (void)
45843 return TARGET_64BIT
45844 ? default_external_stack_protect_fail ()
45845 : default_hidden_stack_protect_fail ();
45848 /* Select a format to encode pointers in exception handling data. CODE
45849 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
45850 true if the symbol may be affected by dynamic relocations.
45852 ??? All x86 object file formats are capable of representing this.
45853 After all, the relocation needed is the same as for the call insn.
45854 Whether or not a particular assembler allows us to enter such, I
45855 guess we'll have to see. */
45857 asm_preferred_eh_data_format (int code, int global)
45861 int type = DW_EH_PE_sdata8;
45863 || ix86_cmodel == CM_SMALL_PIC
45864 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
45865 type = DW_EH_PE_sdata4;
45866 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
45868 if (ix86_cmodel == CM_SMALL
45869 || (ix86_cmodel == CM_MEDIUM && code))
45870 return DW_EH_PE_udata4;
45871 return DW_EH_PE_absptr;
45874 /* Expand copysign from SIGN to the positive value ABS_VALUE
45875 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
45878 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
45880 machine_mode mode = GET_MODE (sign);
45881 rtx sgn = gen_reg_rtx (mode);
45882 if (mask == NULL_RTX)
45884 machine_mode vmode;
45886 if (mode == SFmode)
45888 else if (mode == DFmode)
45893 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
45894 if (!VECTOR_MODE_P (mode))
45896 /* We need to generate a scalar mode mask in this case. */
45897 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45898 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45899 mask = gen_reg_rtx (mode);
45900 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
45904 mask = gen_rtx_NOT (mode, mask);
45905 emit_insn (gen_rtx_SET (VOIDmode, sgn,
45906 gen_rtx_AND (mode, mask, sign)));
45907 emit_insn (gen_rtx_SET (VOIDmode, result,
45908 gen_rtx_IOR (mode, abs_value, sgn)));
45911 /* Expand fabs (OP0) and return a new rtx that holds the result. The
45912 mask for masking out the sign-bit is stored in *SMASK, if that is
45915 ix86_expand_sse_fabs (rtx op0, rtx *smask)
45917 machine_mode vmode, mode = GET_MODE (op0);
45920 xa = gen_reg_rtx (mode);
45921 if (mode == SFmode)
45923 else if (mode == DFmode)
45927 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
45928 if (!VECTOR_MODE_P (mode))
45930 /* We need to generate a scalar mode mask in this case. */
45931 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45932 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45933 mask = gen_reg_rtx (mode);
45934 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
45936 emit_insn (gen_rtx_SET (VOIDmode, xa,
45937 gen_rtx_AND (mode, op0, mask)));
45945 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
45946 swapping the operands if SWAP_OPERANDS is true. The expanded
45947 code is a forward jump to a newly created label in case the
45948 comparison is true. The generated label rtx is returned. */
45949 static rtx_code_label *
45950 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
45951 bool swap_operands)
45953 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
45954 rtx_code_label *label;
45958 std::swap (op0, op1);
45960 label = gen_label_rtx ();
45961 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
45962 emit_insn (gen_rtx_SET (VOIDmode, tmp,
45963 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
45964 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
45965 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
45966 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
45967 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
45968 JUMP_LABEL (tmp) = label;
45973 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
45974 using comparison code CODE. Operands are swapped for the comparison if
45975 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
45977 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
45978 bool swap_operands)
45980 rtx (*insn)(rtx, rtx, rtx, rtx);
45981 machine_mode mode = GET_MODE (op0);
45982 rtx mask = gen_reg_rtx (mode);
45985 std::swap (op0, op1);
45987 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
45989 emit_insn (insn (mask, op0, op1,
45990 gen_rtx_fmt_ee (code, mode, op0, op1)));
45994 /* Generate and return a rtx of mode MODE for 2**n where n is the number
45995 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
45997 ix86_gen_TWO52 (machine_mode mode)
45999 REAL_VALUE_TYPE TWO52r;
46002 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
46003 TWO52 = const_double_from_real_value (TWO52r, mode);
46004 TWO52 = force_reg (mode, TWO52);
46009 /* Expand SSE sequence for computing lround from OP1 storing
46012 ix86_expand_lround (rtx op0, rtx op1)
46014 /* C code for the stuff we're doing below:
46015 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
46018 machine_mode mode = GET_MODE (op1);
46019 const struct real_format *fmt;
46020 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46023 /* load nextafter (0.5, 0.0) */
46024 fmt = REAL_MODE_FORMAT (mode);
46025 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46026 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46028 /* adj = copysign (0.5, op1) */
46029 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
46030 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
46032 /* adj = op1 + adj */
46033 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
46035 /* op0 = (imode)adj */
46036 expand_fix (op0, adj, 0);
46039 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
46042 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
46044 /* C code for the stuff we're doing below (for do_floor):
46046 xi -= (double)xi > op1 ? 1 : 0;
46049 machine_mode fmode = GET_MODE (op1);
46050 machine_mode imode = GET_MODE (op0);
46051 rtx ireg, freg, tmp;
46052 rtx_code_label *label;
46054 /* reg = (long)op1 */
46055 ireg = gen_reg_rtx (imode);
46056 expand_fix (ireg, op1, 0);
46058 /* freg = (double)reg */
46059 freg = gen_reg_rtx (fmode);
46060 expand_float (freg, ireg, 0);
46062 /* ireg = (freg > op1) ? ireg - 1 : ireg */
46063 label = ix86_expand_sse_compare_and_jump (UNLE,
46064 freg, op1, !do_floor);
46065 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46066 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46067 emit_move_insn (ireg, tmp);
46069 emit_label (label);
46070 LABEL_NUSES (label) = 1;
46072 emit_move_insn (op0, ireg);
46075 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46076 result in OPERAND0. */
46078 ix86_expand_rint (rtx operand0, rtx operand1)
46080 /* C code for the stuff we're doing below:
46081 xa = fabs (operand1);
46082 if (!isless (xa, 2**52))
46084 xa = xa + 2**52 - 2**52;
46085 return copysign (xa, operand1);
46087 machine_mode mode = GET_MODE (operand0);
46088 rtx res, xa, TWO52, mask;
46089 rtx_code_label *label;
46091 res = gen_reg_rtx (mode);
46092 emit_move_insn (res, operand1);
46094 /* xa = abs (operand1) */
46095 xa = ix86_expand_sse_fabs (res, &mask);
46097 /* if (!isless (xa, TWO52)) goto label; */
46098 TWO52 = ix86_gen_TWO52 (mode);
46099 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46101 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46102 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46104 ix86_sse_copysign_to_positive (res, xa, res, mask);
46106 emit_label (label);
46107 LABEL_NUSES (label) = 1;
46109 emit_move_insn (operand0, res);
46112 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46115 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46117 /* C code for the stuff we expand below.
46118 double xa = fabs (x), x2;
46119 if (!isless (xa, TWO52))
46121 xa = xa + TWO52 - TWO52;
46122 x2 = copysign (xa, x);
46131 machine_mode mode = GET_MODE (operand0);
46132 rtx xa, TWO52, tmp, one, res, mask;
46133 rtx_code_label *label;
46135 TWO52 = ix86_gen_TWO52 (mode);
46137 /* Temporary for holding the result, initialized to the input
46138 operand to ease control flow. */
46139 res = gen_reg_rtx (mode);
46140 emit_move_insn (res, operand1);
46142 /* xa = abs (operand1) */
46143 xa = ix86_expand_sse_fabs (res, &mask);
46145 /* if (!isless (xa, TWO52)) goto label; */
46146 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46148 /* xa = xa + TWO52 - TWO52; */
46149 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46150 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46152 /* xa = copysign (xa, operand1) */
46153 ix86_sse_copysign_to_positive (xa, xa, res, mask);
46155 /* generate 1.0 or -1.0 */
46156 one = force_reg (mode,
46157 const_double_from_real_value (do_floor
46158 ? dconst1 : dconstm1, mode));
46160 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46161 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46162 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46163 gen_rtx_AND (mode, one, tmp)));
46164 /* We always need to subtract here to preserve signed zero. */
46165 tmp = expand_simple_binop (mode, MINUS,
46166 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46167 emit_move_insn (res, tmp);
46169 emit_label (label);
46170 LABEL_NUSES (label) = 1;
46172 emit_move_insn (operand0, res);
46175 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46178 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46180 /* C code for the stuff we expand below.
46181 double xa = fabs (x), x2;
46182 if (!isless (xa, TWO52))
46184 x2 = (double)(long)x;
46191 if (HONOR_SIGNED_ZEROS (mode))
46192 return copysign (x2, x);
46195 machine_mode mode = GET_MODE (operand0);
46196 rtx xa, xi, TWO52, tmp, one, res, mask;
46197 rtx_code_label *label;
46199 TWO52 = ix86_gen_TWO52 (mode);
46201 /* Temporary for holding the result, initialized to the input
46202 operand to ease control flow. */
46203 res = gen_reg_rtx (mode);
46204 emit_move_insn (res, operand1);
46206 /* xa = abs (operand1) */
46207 xa = ix86_expand_sse_fabs (res, &mask);
46209 /* if (!isless (xa, TWO52)) goto label; */
46210 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46212 /* xa = (double)(long)x */
46213 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46214 expand_fix (xi, res, 0);
46215 expand_float (xa, xi, 0);
46218 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46220 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46221 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46222 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46223 gen_rtx_AND (mode, one, tmp)));
46224 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46225 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46226 emit_move_insn (res, tmp);
46228 if (HONOR_SIGNED_ZEROS (mode))
46229 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46231 emit_label (label);
46232 LABEL_NUSES (label) = 1;
46234 emit_move_insn (operand0, res);
46237 /* Expand SSE sequence for computing round from OPERAND1 storing
46238 into OPERAND0. Sequence that works without relying on DImode truncation
46239 via cvttsd2siq that is only available on 64bit targets. */
46241 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46243 /* C code for the stuff we expand below.
46244 double xa = fabs (x), xa2, x2;
46245 if (!isless (xa, TWO52))
46247 Using the absolute value and copying back sign makes
46248 -0.0 -> -0.0 correct.
46249 xa2 = xa + TWO52 - TWO52;
46254 else if (dxa > 0.5)
46256 x2 = copysign (xa2, x);
46259 machine_mode mode = GET_MODE (operand0);
46260 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46261 rtx_code_label *label;
46263 TWO52 = ix86_gen_TWO52 (mode);
46265 /* Temporary for holding the result, initialized to the input
46266 operand to ease control flow. */
46267 res = gen_reg_rtx (mode);
46268 emit_move_insn (res, operand1);
46270 /* xa = abs (operand1) */
46271 xa = ix86_expand_sse_fabs (res, &mask);
46273 /* if (!isless (xa, TWO52)) goto label; */
46274 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46276 /* xa2 = xa + TWO52 - TWO52; */
46277 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46278 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46280 /* dxa = xa2 - xa; */
46281 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46283 /* generate 0.5, 1.0 and -0.5 */
46284 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46285 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46286 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46290 tmp = gen_reg_rtx (mode);
46291 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46292 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46293 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46294 gen_rtx_AND (mode, one, tmp)));
46295 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46296 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46297 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46298 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46299 gen_rtx_AND (mode, one, tmp)));
46300 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46302 /* res = copysign (xa2, operand1) */
46303 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46305 emit_label (label);
46306 LABEL_NUSES (label) = 1;
46308 emit_move_insn (operand0, res);
46311 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46314 ix86_expand_trunc (rtx operand0, rtx operand1)
46316 /* C code for SSE variant we expand below.
46317 double xa = fabs (x), x2;
46318 if (!isless (xa, TWO52))
46320 x2 = (double)(long)x;
46321 if (HONOR_SIGNED_ZEROS (mode))
46322 return copysign (x2, x);
46325 machine_mode mode = GET_MODE (operand0);
46326 rtx xa, xi, TWO52, res, mask;
46327 rtx_code_label *label;
46329 TWO52 = ix86_gen_TWO52 (mode);
46331 /* Temporary for holding the result, initialized to the input
46332 operand to ease control flow. */
46333 res = gen_reg_rtx (mode);
46334 emit_move_insn (res, operand1);
46336 /* xa = abs (operand1) */
46337 xa = ix86_expand_sse_fabs (res, &mask);
46339 /* if (!isless (xa, TWO52)) goto label; */
46340 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46342 /* x = (double)(long)x */
46343 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46344 expand_fix (xi, res, 0);
46345 expand_float (res, xi, 0);
46347 if (HONOR_SIGNED_ZEROS (mode))
46348 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46350 emit_label (label);
46351 LABEL_NUSES (label) = 1;
46353 emit_move_insn (operand0, res);
46356 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46359 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46361 machine_mode mode = GET_MODE (operand0);
46362 rtx xa, mask, TWO52, one, res, smask, tmp;
46363 rtx_code_label *label;
46365 /* C code for SSE variant we expand below.
46366 double xa = fabs (x), x2;
46367 if (!isless (xa, TWO52))
46369 xa2 = xa + TWO52 - TWO52;
46373 x2 = copysign (xa2, x);
46377 TWO52 = ix86_gen_TWO52 (mode);
46379 /* Temporary for holding the result, initialized to the input
46380 operand to ease control flow. */
46381 res = gen_reg_rtx (mode);
46382 emit_move_insn (res, operand1);
46384 /* xa = abs (operand1) */
46385 xa = ix86_expand_sse_fabs (res, &smask);
46387 /* if (!isless (xa, TWO52)) goto label; */
46388 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46390 /* res = xa + TWO52 - TWO52; */
46391 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46392 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46393 emit_move_insn (res, tmp);
46396 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46398 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46399 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46400 emit_insn (gen_rtx_SET (VOIDmode, mask,
46401 gen_rtx_AND (mode, mask, one)));
46402 tmp = expand_simple_binop (mode, MINUS,
46403 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46404 emit_move_insn (res, tmp);
46406 /* res = copysign (res, operand1) */
46407 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46409 emit_label (label);
46410 LABEL_NUSES (label) = 1;
46412 emit_move_insn (operand0, res);
46415 /* Expand SSE sequence for computing round from OPERAND1 storing
46418 ix86_expand_round (rtx operand0, rtx operand1)
46420 /* C code for the stuff we're doing below:
46421 double xa = fabs (x);
46422 if (!isless (xa, TWO52))
46424 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46425 return copysign (xa, x);
46427 machine_mode mode = GET_MODE (operand0);
46428 rtx res, TWO52, xa, xi, half, mask;
46429 rtx_code_label *label;
46430 const struct real_format *fmt;
46431 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46433 /* Temporary for holding the result, initialized to the input
46434 operand to ease control flow. */
46435 res = gen_reg_rtx (mode);
46436 emit_move_insn (res, operand1);
46438 TWO52 = ix86_gen_TWO52 (mode);
46439 xa = ix86_expand_sse_fabs (res, &mask);
46440 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46442 /* load nextafter (0.5, 0.0) */
46443 fmt = REAL_MODE_FORMAT (mode);
46444 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46445 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46447 /* xa = xa + 0.5 */
46448 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46449 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46451 /* xa = (double)(int64_t)xa */
46452 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46453 expand_fix (xi, xa, 0);
46454 expand_float (xa, xi, 0);
46456 /* res = copysign (xa, operand1) */
46457 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46459 emit_label (label);
46460 LABEL_NUSES (label) = 1;
46462 emit_move_insn (operand0, res);
46465 /* Expand SSE sequence for computing round
46466 from OP1 storing into OP0 using sse4 round insn. */
46468 ix86_expand_round_sse4 (rtx op0, rtx op1)
46470 machine_mode mode = GET_MODE (op0);
46471 rtx e1, e2, res, half;
46472 const struct real_format *fmt;
46473 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46474 rtx (*gen_copysign) (rtx, rtx, rtx);
46475 rtx (*gen_round) (rtx, rtx, rtx);
46480 gen_copysign = gen_copysignsf3;
46481 gen_round = gen_sse4_1_roundsf2;
46484 gen_copysign = gen_copysigndf3;
46485 gen_round = gen_sse4_1_rounddf2;
46488 gcc_unreachable ();
46491 /* round (a) = trunc (a + copysign (0.5, a)) */
46493 /* load nextafter (0.5, 0.0) */
46494 fmt = REAL_MODE_FORMAT (mode);
46495 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46496 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46497 half = const_double_from_real_value (pred_half, mode);
46499 /* e1 = copysign (0.5, op1) */
46500 e1 = gen_reg_rtx (mode);
46501 emit_insn (gen_copysign (e1, half, op1));
46503 /* e2 = op1 + e1 */
46504 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46506 /* res = trunc (e2) */
46507 res = gen_reg_rtx (mode);
46508 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46510 emit_move_insn (op0, res);
46514 /* Table of valid machine attributes. */
46515 static const struct attribute_spec ix86_attribute_table[] =
46517 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46518 affects_type_identity } */
46519 /* Stdcall attribute says callee is responsible for popping arguments
46520 if they are not variable. */
46521 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46523 /* Fastcall attribute says callee is responsible for popping arguments
46524 if they are not variable. */
46525 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46527 /* Thiscall attribute says callee is responsible for popping arguments
46528 if they are not variable. */
46529 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46531 /* Cdecl attribute says the callee is a normal C declaration */
46532 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46534 /* Regparm attribute specifies how many integer arguments are to be
46535 passed in registers. */
46536 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
46538 /* Sseregparm attribute says we are using x86_64 calling conventions
46539 for FP arguments. */
46540 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46542 /* The transactional memory builtins are implicitly regparm or fastcall
46543 depending on the ABI. Override the generic do-nothing attribute that
46544 these builtins were declared with. */
46545 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
46547 /* force_align_arg_pointer says this function realigns the stack at entry. */
46548 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46549 false, true, true, ix86_handle_cconv_attribute, false },
46550 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46551 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
46552 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
46553 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
46556 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46558 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46560 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46561 SUBTARGET_ATTRIBUTE_TABLE,
46563 /* ms_abi and sysv_abi calling convention function attributes. */
46564 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46565 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46566 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
46568 { "callee_pop_aggregate_return", 1, 1, false, true, true,
46569 ix86_handle_callee_pop_aggregate_return, true },
46571 { NULL, 0, 0, false, false, false, NULL, false }
46574 /* Implement targetm.vectorize.builtin_vectorization_cost. */
46576 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46581 switch (type_of_cost)
46584 return ix86_cost->scalar_stmt_cost;
46587 return ix86_cost->scalar_load_cost;
46590 return ix86_cost->scalar_store_cost;
46593 return ix86_cost->vec_stmt_cost;
46596 return ix86_cost->vec_align_load_cost;
46599 return ix86_cost->vec_store_cost;
46601 case vec_to_scalar:
46602 return ix86_cost->vec_to_scalar_cost;
46604 case scalar_to_vec:
46605 return ix86_cost->scalar_to_vec_cost;
46607 case unaligned_load:
46608 case unaligned_store:
46609 return ix86_cost->vec_unalign_load_cost;
46611 case cond_branch_taken:
46612 return ix86_cost->cond_taken_branch_cost;
46614 case cond_branch_not_taken:
46615 return ix86_cost->cond_not_taken_branch_cost;
46618 case vec_promote_demote:
46619 return ix86_cost->vec_stmt_cost;
46621 case vec_construct:
46622 elements = TYPE_VECTOR_SUBPARTS (vectype);
46623 return elements / 2 + 1;
46626 gcc_unreachable ();
46630 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
46631 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
46632 insn every time. */
46634 static GTY(()) rtx_insn *vselect_insn;
46636 /* Initialize vselect_insn. */
46639 init_vselect_insn (void)
46644 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
46645 for (i = 0; i < MAX_VECT_LEN; ++i)
46646 XVECEXP (x, 0, i) = const0_rtx;
46647 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
46649 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
46651 vselect_insn = emit_insn (x);
46655 /* Construct (set target (vec_select op0 (parallel perm))) and
46656 return true if that's a valid instruction in the active ISA. */
46659 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
46660 unsigned nelt, bool testing_p)
46663 rtx x, save_vconcat;
46666 if (vselect_insn == NULL_RTX)
46667 init_vselect_insn ();
46669 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
46670 PUT_NUM_ELEM (XVEC (x, 0), nelt);
46671 for (i = 0; i < nelt; ++i)
46672 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
46673 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46674 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
46675 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
46676 SET_DEST (PATTERN (vselect_insn)) = target;
46677 icode = recog_memoized (vselect_insn);
46679 if (icode >= 0 && !testing_p)
46680 emit_insn (copy_rtx (PATTERN (vselect_insn)));
46682 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
46683 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
46684 INSN_CODE (vselect_insn) = -1;
46689 /* Similar, but generate a vec_concat from op0 and op1 as well. */
46692 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
46693 const unsigned char *perm, unsigned nelt,
46696 machine_mode v2mode;
46700 if (vselect_insn == NULL_RTX)
46701 init_vselect_insn ();
46703 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
46704 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46705 PUT_MODE (x, v2mode);
46708 ok = expand_vselect (target, x, perm, nelt, testing_p);
46709 XEXP (x, 0) = const0_rtx;
46710 XEXP (x, 1) = const0_rtx;
46714 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46715 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
46718 expand_vec_perm_blend (struct expand_vec_perm_d *d)
46720 machine_mode vmode = d->vmode;
46721 unsigned i, mask, nelt = d->nelt;
46722 rtx target, op0, op1, x;
46723 rtx rperm[32], vperm;
46725 if (d->one_operand_p)
46727 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
46728 && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
46730 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
46732 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
46734 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46739 /* This is a blend, not a permute. Elements must stay in their
46740 respective lanes. */
46741 for (i = 0; i < nelt; ++i)
46743 unsigned e = d->perm[i];
46744 if (!(e == i || e == i + nelt))
46751 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
46752 decision should be extracted elsewhere, so that we only try that
46753 sequence once all budget==3 options have been tried. */
46754 target = d->target;
46773 for (i = 0; i < nelt; ++i)
46774 mask |= (d->perm[i] >= nelt) << i;
46778 for (i = 0; i < 2; ++i)
46779 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
46784 for (i = 0; i < 4; ++i)
46785 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46790 /* See if bytes move in pairs so we can use pblendw with
46791 an immediate argument, rather than pblendvb with a vector
46793 for (i = 0; i < 16; i += 2)
46794 if (d->perm[i] + 1 != d->perm[i + 1])
46797 for (i = 0; i < nelt; ++i)
46798 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
46801 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
46802 vperm = force_reg (vmode, vperm);
46804 if (GET_MODE_SIZE (vmode) == 16)
46805 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
46807 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
46808 if (target != d->target)
46809 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46813 for (i = 0; i < 8; ++i)
46814 mask |= (d->perm[i * 2] >= 16) << i;
46819 target = gen_reg_rtx (vmode);
46820 op0 = gen_lowpart (vmode, op0);
46821 op1 = gen_lowpart (vmode, op1);
46825 /* See if bytes move in pairs. If not, vpblendvb must be used. */
46826 for (i = 0; i < 32; i += 2)
46827 if (d->perm[i] + 1 != d->perm[i + 1])
46829 /* See if bytes move in quadruplets. If yes, vpblendd
46830 with immediate can be used. */
46831 for (i = 0; i < 32; i += 4)
46832 if (d->perm[i] + 2 != d->perm[i + 2])
46836 /* See if bytes move the same in both lanes. If yes,
46837 vpblendw with immediate can be used. */
46838 for (i = 0; i < 16; i += 2)
46839 if (d->perm[i] + 16 != d->perm[i + 16])
46842 /* Use vpblendw. */
46843 for (i = 0; i < 16; ++i)
46844 mask |= (d->perm[i * 2] >= 32) << i;
46849 /* Use vpblendd. */
46850 for (i = 0; i < 8; ++i)
46851 mask |= (d->perm[i * 4] >= 32) << i;
46856 /* See if words move in pairs. If yes, vpblendd can be used. */
46857 for (i = 0; i < 16; i += 2)
46858 if (d->perm[i] + 1 != d->perm[i + 1])
46862 /* See if words move the same in both lanes. If not,
46863 vpblendvb must be used. */
46864 for (i = 0; i < 8; i++)
46865 if (d->perm[i] + 8 != d->perm[i + 8])
46867 /* Use vpblendvb. */
46868 for (i = 0; i < 32; ++i)
46869 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
46873 target = gen_reg_rtx (vmode);
46874 op0 = gen_lowpart (vmode, op0);
46875 op1 = gen_lowpart (vmode, op1);
46876 goto finish_pblendvb;
46879 /* Use vpblendw. */
46880 for (i = 0; i < 16; ++i)
46881 mask |= (d->perm[i] >= 16) << i;
46885 /* Use vpblendd. */
46886 for (i = 0; i < 8; ++i)
46887 mask |= (d->perm[i * 2] >= 16) << i;
46892 /* Use vpblendd. */
46893 for (i = 0; i < 4; ++i)
46894 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46899 gcc_unreachable ();
46902 /* This matches five different patterns with the different modes. */
46903 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
46904 x = gen_rtx_SET (VOIDmode, target, x);
46906 if (target != d->target)
46907 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46912 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46913 in terms of the variable form of vpermilps.
46915 Note that we will have already failed the immediate input vpermilps,
46916 which requires that the high and low part shuffle be identical; the
46917 variable form doesn't require that. */
46920 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
46922 rtx rperm[8], vperm;
46925 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
46928 /* We can only permute within the 128-bit lane. */
46929 for (i = 0; i < 8; ++i)
46931 unsigned e = d->perm[i];
46932 if (i < 4 ? e >= 4 : e < 4)
46939 for (i = 0; i < 8; ++i)
46941 unsigned e = d->perm[i];
46943 /* Within each 128-bit lane, the elements of op0 are numbered
46944 from 0 and the elements of op1 are numbered from 4. */
46950 rperm[i] = GEN_INT (e);
46953 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
46954 vperm = force_reg (V8SImode, vperm);
46955 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
46960 /* Return true if permutation D can be performed as VMODE permutation
46964 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
46966 unsigned int i, j, chunk;
46968 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
46969 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
46970 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
46973 if (GET_MODE_NUNITS (vmode) >= d->nelt)
46976 chunk = d->nelt / GET_MODE_NUNITS (vmode);
46977 for (i = 0; i < d->nelt; i += chunk)
46978 if (d->perm[i] & (chunk - 1))
46981 for (j = 1; j < chunk; ++j)
46982 if (d->perm[i] + j != d->perm[i + j])
46988 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46989 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
46992 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
46994 unsigned i, nelt, eltsz, mask;
46995 unsigned char perm[64];
46996 machine_mode vmode = V16QImode;
46997 rtx rperm[64], vperm, target, op0, op1;
47001 if (!d->one_operand_p)
47003 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
47006 && valid_perm_using_mode_p (V2TImode, d))
47011 /* Use vperm2i128 insn. The pattern uses
47012 V4DImode instead of V2TImode. */
47013 target = d->target;
47014 if (d->vmode != V4DImode)
47015 target = gen_reg_rtx (V4DImode);
47016 op0 = gen_lowpart (V4DImode, d->op0);
47017 op1 = gen_lowpart (V4DImode, d->op1);
47019 = GEN_INT ((d->perm[0] / (nelt / 2))
47020 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
47021 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
47022 if (target != d->target)
47023 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47031 if (GET_MODE_SIZE (d->vmode) == 16)
47036 else if (GET_MODE_SIZE (d->vmode) == 32)
47041 /* V4DImode should be already handled through
47042 expand_vselect by vpermq instruction. */
47043 gcc_assert (d->vmode != V4DImode);
47046 if (d->vmode == V8SImode
47047 || d->vmode == V16HImode
47048 || d->vmode == V32QImode)
47050 /* First see if vpermq can be used for
47051 V8SImode/V16HImode/V32QImode. */
47052 if (valid_perm_using_mode_p (V4DImode, d))
47054 for (i = 0; i < 4; i++)
47055 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47058 target = gen_reg_rtx (V4DImode);
47059 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47062 emit_move_insn (d->target,
47063 gen_lowpart (d->vmode, target));
47069 /* Next see if vpermd can be used. */
47070 if (valid_perm_using_mode_p (V8SImode, d))
47073 /* Or if vpermps can be used. */
47074 else if (d->vmode == V8SFmode)
47077 if (vmode == V32QImode)
47079 /* vpshufb only works intra lanes, it is not
47080 possible to shuffle bytes in between the lanes. */
47081 for (i = 0; i < nelt; ++i)
47082 if ((d->perm[i] ^ i) & (nelt / 2))
47086 else if (GET_MODE_SIZE (d->vmode) == 64)
47088 if (!TARGET_AVX512BW)
47091 /* If vpermq didn't work, vpshufb won't work either. */
47092 if (d->vmode == V8DFmode || d->vmode == V8DImode)
47096 if (d->vmode == V16SImode
47097 || d->vmode == V32HImode
47098 || d->vmode == V64QImode)
47100 /* First see if vpermq can be used for
47101 V16SImode/V32HImode/V64QImode. */
47102 if (valid_perm_using_mode_p (V8DImode, d))
47104 for (i = 0; i < 8; i++)
47105 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47108 target = gen_reg_rtx (V8DImode);
47109 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47112 emit_move_insn (d->target,
47113 gen_lowpart (d->vmode, target));
47119 /* Next see if vpermd can be used. */
47120 if (valid_perm_using_mode_p (V16SImode, d))
47123 /* Or if vpermps can be used. */
47124 else if (d->vmode == V16SFmode)
47126 if (vmode == V64QImode)
47128 /* vpshufb only works intra lanes, it is not
47129 possible to shuffle bytes in between the lanes. */
47130 for (i = 0; i < nelt; ++i)
47131 if ((d->perm[i] ^ i) & (nelt / 4))
47142 if (vmode == V8SImode)
47143 for (i = 0; i < 8; ++i)
47144 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47145 else if (vmode == V16SImode)
47146 for (i = 0; i < 16; ++i)
47147 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47150 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47151 if (!d->one_operand_p)
47152 mask = 2 * nelt - 1;
47153 else if (vmode == V16QImode)
47155 else if (vmode == V64QImode)
47156 mask = nelt / 4 - 1;
47158 mask = nelt / 2 - 1;
47160 for (i = 0; i < nelt; ++i)
47162 unsigned j, e = d->perm[i] & mask;
47163 for (j = 0; j < eltsz; ++j)
47164 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47168 vperm = gen_rtx_CONST_VECTOR (vmode,
47169 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47170 vperm = force_reg (vmode, vperm);
47172 target = d->target;
47173 if (d->vmode != vmode)
47174 target = gen_reg_rtx (vmode);
47175 op0 = gen_lowpart (vmode, d->op0);
47176 if (d->one_operand_p)
47178 if (vmode == V16QImode)
47179 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47180 else if (vmode == V32QImode)
47181 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47182 else if (vmode == V64QImode)
47183 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47184 else if (vmode == V8SFmode)
47185 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47186 else if (vmode == V8SImode)
47187 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47188 else if (vmode == V16SFmode)
47189 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47190 else if (vmode == V16SImode)
47191 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47193 gcc_unreachable ();
47197 op1 = gen_lowpart (vmode, d->op1);
47198 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47200 if (target != d->target)
47201 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47206 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47207 in a single instruction. */
47210 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47212 unsigned i, nelt = d->nelt;
47213 unsigned char perm2[MAX_VECT_LEN];
47215 /* Check plain VEC_SELECT first, because AVX has instructions that could
47216 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47217 input where SEL+CONCAT may not. */
47218 if (d->one_operand_p)
47220 int mask = nelt - 1;
47221 bool identity_perm = true;
47222 bool broadcast_perm = true;
47224 for (i = 0; i < nelt; i++)
47226 perm2[i] = d->perm[i] & mask;
47228 identity_perm = false;
47230 broadcast_perm = false;
47236 emit_move_insn (d->target, d->op0);
47239 else if (broadcast_perm && TARGET_AVX2)
47241 /* Use vpbroadcast{b,w,d}. */
47242 rtx (*gen) (rtx, rtx) = NULL;
47246 if (TARGET_AVX512BW)
47247 gen = gen_avx512bw_vec_dupv64qi_1;
47250 gen = gen_avx2_pbroadcastv32qi_1;
47253 if (TARGET_AVX512BW)
47254 gen = gen_avx512bw_vec_dupv32hi_1;
47257 gen = gen_avx2_pbroadcastv16hi_1;
47260 if (TARGET_AVX512F)
47261 gen = gen_avx512f_vec_dupv16si_1;
47264 gen = gen_avx2_pbroadcastv8si_1;
47267 gen = gen_avx2_pbroadcastv16qi;
47270 gen = gen_avx2_pbroadcastv8hi;
47273 if (TARGET_AVX512F)
47274 gen = gen_avx512f_vec_dupv16sf_1;
47277 gen = gen_avx2_vec_dupv8sf_1;
47280 if (TARGET_AVX512F)
47281 gen = gen_avx512f_vec_dupv8df_1;
47284 if (TARGET_AVX512F)
47285 gen = gen_avx512f_vec_dupv8di_1;
47287 /* For other modes prefer other shuffles this function creates. */
47293 emit_insn (gen (d->target, d->op0));
47298 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47301 /* There are plenty of patterns in sse.md that are written for
47302 SEL+CONCAT and are not replicated for a single op. Perhaps
47303 that should be changed, to avoid the nastiness here. */
47305 /* Recognize interleave style patterns, which means incrementing
47306 every other permutation operand. */
47307 for (i = 0; i < nelt; i += 2)
47309 perm2[i] = d->perm[i] & mask;
47310 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47312 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47316 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47319 for (i = 0; i < nelt; i += 4)
47321 perm2[i + 0] = d->perm[i + 0] & mask;
47322 perm2[i + 1] = d->perm[i + 1] & mask;
47323 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47324 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47327 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47333 /* Finally, try the fully general two operand permute. */
47334 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47338 /* Recognize interleave style patterns with reversed operands. */
47339 if (!d->one_operand_p)
47341 for (i = 0; i < nelt; ++i)
47343 unsigned e = d->perm[i];
47351 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47356 /* Try the SSE4.1 blend variable merge instructions. */
47357 if (expand_vec_perm_blend (d))
47360 /* Try one of the AVX vpermil variable permutations. */
47361 if (expand_vec_perm_vpermil (d))
47364 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47365 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47366 if (expand_vec_perm_pshufb (d))
47369 /* Try the AVX2 vpalignr instruction. */
47370 if (expand_vec_perm_palignr (d, true))
47373 /* Try the AVX512F vpermi2 instructions. */
47374 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47380 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47381 in terms of a pair of pshuflw + pshufhw instructions. */
47384 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47386 unsigned char perm2[MAX_VECT_LEN];
47390 if (d->vmode != V8HImode || !d->one_operand_p)
47393 /* The two permutations only operate in 64-bit lanes. */
47394 for (i = 0; i < 4; ++i)
47395 if (d->perm[i] >= 4)
47397 for (i = 4; i < 8; ++i)
47398 if (d->perm[i] < 4)
47404 /* Emit the pshuflw. */
47405 memcpy (perm2, d->perm, 4);
47406 for (i = 4; i < 8; ++i)
47408 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47411 /* Emit the pshufhw. */
47412 memcpy (perm2 + 4, d->perm + 4, 4);
47413 for (i = 0; i < 4; ++i)
47415 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47421 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47422 the permutation using the SSSE3 palignr instruction. This succeeds
47423 when all of the elements in PERM fit within one vector and we merely
47424 need to shift them down so that a single vector permutation has a
47425 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47426 the vpalignr instruction itself can perform the requested permutation. */
47429 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47431 unsigned i, nelt = d->nelt;
47432 unsigned min, max, minswap, maxswap;
47433 bool in_order, ok, swap = false;
47435 struct expand_vec_perm_d dcopy;
47437 /* Even with AVX, palignr only operates on 128-bit vectors,
47438 in AVX2 palignr operates on both 128-bit lanes. */
47439 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47440 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47445 minswap = 2 * nelt;
47447 for (i = 0; i < nelt; ++i)
47449 unsigned e = d->perm[i];
47450 unsigned eswap = d->perm[i] ^ nelt;
47451 if (GET_MODE_SIZE (d->vmode) == 32)
47453 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47454 eswap = e ^ (nelt / 2);
47460 if (eswap < minswap)
47462 if (eswap > maxswap)
47466 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47468 if (d->one_operand_p
47470 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47471 ? nelt / 2 : nelt))
47478 /* Given that we have SSSE3, we know we'll be able to implement the
47479 single operand permutation after the palignr with pshufb for
47480 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47482 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47488 dcopy.op0 = d->op1;
47489 dcopy.op1 = d->op0;
47490 for (i = 0; i < nelt; ++i)
47491 dcopy.perm[i] ^= nelt;
47495 for (i = 0; i < nelt; ++i)
47497 unsigned e = dcopy.perm[i];
47498 if (GET_MODE_SIZE (d->vmode) == 32
47500 && (e & (nelt / 2 - 1)) < min)
47501 e = e - min - (nelt / 2);
47508 dcopy.one_operand_p = true;
47510 if (single_insn_only_p && !in_order)
47513 /* For AVX2, test whether we can permute the result in one instruction. */
47518 dcopy.op1 = dcopy.op0;
47519 return expand_vec_perm_1 (&dcopy);
47522 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
47523 if (GET_MODE_SIZE (d->vmode) == 16)
47525 target = gen_reg_rtx (TImode);
47526 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47527 gen_lowpart (TImode, dcopy.op0), shift));
47531 target = gen_reg_rtx (V2TImode);
47532 emit_insn (gen_avx2_palignrv2ti (target,
47533 gen_lowpart (V2TImode, dcopy.op1),
47534 gen_lowpart (V2TImode, dcopy.op0),
47538 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47540 /* Test for the degenerate case where the alignment by itself
47541 produces the desired permutation. */
47544 emit_move_insn (d->target, dcopy.op0);
47548 ok = expand_vec_perm_1 (&dcopy);
47549 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47554 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
47555 the permutation using the SSE4_1 pblendv instruction. Potentially
47556 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
47559 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47561 unsigned i, which, nelt = d->nelt;
47562 struct expand_vec_perm_d dcopy, dcopy1;
47563 machine_mode vmode = d->vmode;
47566 /* Use the same checks as in expand_vec_perm_blend. */
47567 if (d->one_operand_p)
47569 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47571 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47573 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47578 /* Figure out where permutation elements stay not in their
47579 respective lanes. */
47580 for (i = 0, which = 0; i < nelt; ++i)
47582 unsigned e = d->perm[i];
47584 which |= (e < nelt ? 1 : 2);
47586 /* We can pblend the part where elements stay not in their
47587 respective lanes only when these elements are all in one
47588 half of a permutation.
47589 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
47590 lanes, but both 8 and 9 >= 8
47591 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
47592 respective lanes and 8 >= 8, but 2 not. */
47593 if (which != 1 && which != 2)
47595 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
47598 /* First we apply one operand permutation to the part where
47599 elements stay not in their respective lanes. */
47602 dcopy.op0 = dcopy.op1 = d->op1;
47604 dcopy.op0 = dcopy.op1 = d->op0;
47606 dcopy.target = gen_reg_rtx (vmode);
47607 dcopy.one_operand_p = true;
47609 for (i = 0; i < nelt; ++i)
47610 dcopy.perm[i] = d->perm[i] & (nelt - 1);
47612 ok = expand_vec_perm_1 (&dcopy);
47613 if (GET_MODE_SIZE (vmode) != 16 && !ok)
47620 /* Next we put permuted elements into their positions. */
47623 dcopy1.op1 = dcopy.target;
47625 dcopy1.op0 = dcopy.target;
47627 for (i = 0; i < nelt; ++i)
47628 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
47630 ok = expand_vec_perm_blend (&dcopy1);
47636 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
47638 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47639 a two vector permutation into a single vector permutation by using
47640 an interleave operation to merge the vectors. */
47643 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
47645 struct expand_vec_perm_d dremap, dfinal;
47646 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
47647 unsigned HOST_WIDE_INT contents;
47648 unsigned char remap[2 * MAX_VECT_LEN];
47650 bool ok, same_halves = false;
47652 if (GET_MODE_SIZE (d->vmode) == 16)
47654 if (d->one_operand_p)
47657 else if (GET_MODE_SIZE (d->vmode) == 32)
47661 /* For 32-byte modes allow even d->one_operand_p.
47662 The lack of cross-lane shuffling in some instructions
47663 might prevent a single insn shuffle. */
47665 dfinal.testing_p = true;
47666 /* If expand_vec_perm_interleave3 can expand this into
47667 a 3 insn sequence, give up and let it be expanded as
47668 3 insn sequence. While that is one insn longer,
47669 it doesn't need a memory operand and in the common
47670 case that both interleave low and high permutations
47671 with the same operands are adjacent needs 4 insns
47672 for both after CSE. */
47673 if (expand_vec_perm_interleave3 (&dfinal))
47679 /* Examine from whence the elements come. */
47681 for (i = 0; i < nelt; ++i)
47682 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
47684 memset (remap, 0xff, sizeof (remap));
47687 if (GET_MODE_SIZE (d->vmode) == 16)
47689 unsigned HOST_WIDE_INT h1, h2, h3, h4;
47691 /* Split the two input vectors into 4 halves. */
47692 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
47697 /* If the elements from the low halves use interleave low, and similarly
47698 for interleave high. If the elements are from mis-matched halves, we
47699 can use shufps for V4SF/V4SI or do a DImode shuffle. */
47700 if ((contents & (h1 | h3)) == contents)
47703 for (i = 0; i < nelt2; ++i)
47706 remap[i + nelt] = i * 2 + 1;
47707 dremap.perm[i * 2] = i;
47708 dremap.perm[i * 2 + 1] = i + nelt;
47710 if (!TARGET_SSE2 && d->vmode == V4SImode)
47711 dremap.vmode = V4SFmode;
47713 else if ((contents & (h2 | h4)) == contents)
47716 for (i = 0; i < nelt2; ++i)
47718 remap[i + nelt2] = i * 2;
47719 remap[i + nelt + nelt2] = i * 2 + 1;
47720 dremap.perm[i * 2] = i + nelt2;
47721 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
47723 if (!TARGET_SSE2 && d->vmode == V4SImode)
47724 dremap.vmode = V4SFmode;
47726 else if ((contents & (h1 | h4)) == contents)
47729 for (i = 0; i < nelt2; ++i)
47732 remap[i + nelt + nelt2] = i + nelt2;
47733 dremap.perm[i] = i;
47734 dremap.perm[i + nelt2] = i + nelt + nelt2;
47739 dremap.vmode = V2DImode;
47741 dremap.perm[0] = 0;
47742 dremap.perm[1] = 3;
47745 else if ((contents & (h2 | h3)) == contents)
47748 for (i = 0; i < nelt2; ++i)
47750 remap[i + nelt2] = i;
47751 remap[i + nelt] = i + nelt2;
47752 dremap.perm[i] = i + nelt2;
47753 dremap.perm[i + nelt2] = i + nelt;
47758 dremap.vmode = V2DImode;
47760 dremap.perm[0] = 1;
47761 dremap.perm[1] = 2;
47769 unsigned int nelt4 = nelt / 4, nzcnt = 0;
47770 unsigned HOST_WIDE_INT q[8];
47771 unsigned int nonzero_halves[4];
47773 /* Split the two input vectors into 8 quarters. */
47774 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
47775 for (i = 1; i < 8; ++i)
47776 q[i] = q[0] << (nelt4 * i);
47777 for (i = 0; i < 4; ++i)
47778 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
47780 nonzero_halves[nzcnt] = i;
47786 gcc_assert (d->one_operand_p);
47787 nonzero_halves[1] = nonzero_halves[0];
47788 same_halves = true;
47790 else if (d->one_operand_p)
47792 gcc_assert (nonzero_halves[0] == 0);
47793 gcc_assert (nonzero_halves[1] == 1);
47798 if (d->perm[0] / nelt2 == nonzero_halves[1])
47800 /* Attempt to increase the likelihood that dfinal
47801 shuffle will be intra-lane. */
47802 char tmph = nonzero_halves[0];
47803 nonzero_halves[0] = nonzero_halves[1];
47804 nonzero_halves[1] = tmph;
47807 /* vperm2f128 or vperm2i128. */
47808 for (i = 0; i < nelt2; ++i)
47810 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
47811 remap[i + nonzero_halves[0] * nelt2] = i;
47812 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
47813 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
47816 if (d->vmode != V8SFmode
47817 && d->vmode != V4DFmode
47818 && d->vmode != V8SImode)
47820 dremap.vmode = V8SImode;
47822 for (i = 0; i < 4; ++i)
47824 dremap.perm[i] = i + nonzero_halves[0] * 4;
47825 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
47829 else if (d->one_operand_p)
47831 else if (TARGET_AVX2
47832 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
47835 for (i = 0; i < nelt4; ++i)
47838 remap[i + nelt] = i * 2 + 1;
47839 remap[i + nelt2] = i * 2 + nelt2;
47840 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
47841 dremap.perm[i * 2] = i;
47842 dremap.perm[i * 2 + 1] = i + nelt;
47843 dremap.perm[i * 2 + nelt2] = i + nelt2;
47844 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
47847 else if (TARGET_AVX2
47848 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
47851 for (i = 0; i < nelt4; ++i)
47853 remap[i + nelt4] = i * 2;
47854 remap[i + nelt + nelt4] = i * 2 + 1;
47855 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
47856 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
47857 dremap.perm[i * 2] = i + nelt4;
47858 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
47859 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
47860 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
47867 /* Use the remapping array set up above to move the elements from their
47868 swizzled locations into their final destinations. */
47870 for (i = 0; i < nelt; ++i)
47872 unsigned e = remap[d->perm[i]];
47873 gcc_assert (e < nelt);
47874 /* If same_halves is true, both halves of the remapped vector are the
47875 same. Avoid cross-lane accesses if possible. */
47876 if (same_halves && i >= nelt2)
47878 gcc_assert (e < nelt2);
47879 dfinal.perm[i] = e + nelt2;
47882 dfinal.perm[i] = e;
47886 dremap.target = gen_reg_rtx (dremap.vmode);
47887 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47889 dfinal.op1 = dfinal.op0;
47890 dfinal.one_operand_p = true;
47892 /* Test if the final remap can be done with a single insn. For V4SFmode or
47893 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
47895 ok = expand_vec_perm_1 (&dfinal);
47896 seq = get_insns ();
47905 if (dremap.vmode != dfinal.vmode)
47907 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
47908 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
47911 ok = expand_vec_perm_1 (&dremap);
47918 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47919 a single vector cross-lane permutation into vpermq followed
47920 by any of the single insn permutations. */
47923 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
47925 struct expand_vec_perm_d dremap, dfinal;
47926 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
47927 unsigned contents[2];
47931 && (d->vmode == V32QImode || d->vmode == V16HImode)
47932 && d->one_operand_p))
47937 for (i = 0; i < nelt2; ++i)
47939 contents[0] |= 1u << (d->perm[i] / nelt4);
47940 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
47943 for (i = 0; i < 2; ++i)
47945 unsigned int cnt = 0;
47946 for (j = 0; j < 4; ++j)
47947 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
47955 dremap.vmode = V4DImode;
47957 dremap.target = gen_reg_rtx (V4DImode);
47958 dremap.op0 = gen_lowpart (V4DImode, d->op0);
47959 dremap.op1 = dremap.op0;
47960 dremap.one_operand_p = true;
47961 for (i = 0; i < 2; ++i)
47963 unsigned int cnt = 0;
47964 for (j = 0; j < 4; ++j)
47965 if ((contents[i] & (1u << j)) != 0)
47966 dremap.perm[2 * i + cnt++] = j;
47967 for (; cnt < 2; ++cnt)
47968 dremap.perm[2 * i + cnt] = 0;
47972 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47973 dfinal.op1 = dfinal.op0;
47974 dfinal.one_operand_p = true;
47975 for (i = 0, j = 0; i < nelt; ++i)
47979 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
47980 if ((d->perm[i] / nelt4) == dremap.perm[j])
47982 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
47983 dfinal.perm[i] |= nelt4;
47985 gcc_unreachable ();
47988 ok = expand_vec_perm_1 (&dremap);
47991 ok = expand_vec_perm_1 (&dfinal);
47997 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
47998 a vector permutation using two instructions, vperm2f128 resp.
47999 vperm2i128 followed by any single in-lane permutation. */
48002 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
48004 struct expand_vec_perm_d dfirst, dsecond;
48005 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
48009 || GET_MODE_SIZE (d->vmode) != 32
48010 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
48014 dsecond.one_operand_p = false;
48015 dsecond.testing_p = true;
48017 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
48018 immediate. For perm < 16 the second permutation uses
48019 d->op0 as first operand, for perm >= 16 it uses d->op1
48020 as first operand. The second operand is the result of
48022 for (perm = 0; perm < 32; perm++)
48024 /* Ignore permutations which do not move anything cross-lane. */
48027 /* The second shuffle for e.g. V4DFmode has
48028 0123 and ABCD operands.
48029 Ignore AB23, as 23 is already in the second lane
48030 of the first operand. */
48031 if ((perm & 0xc) == (1 << 2)) continue;
48032 /* And 01CD, as 01 is in the first lane of the first
48034 if ((perm & 3) == 0) continue;
48035 /* And 4567, as then the vperm2[fi]128 doesn't change
48036 anything on the original 4567 second operand. */
48037 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
48041 /* The second shuffle for e.g. V4DFmode has
48042 4567 and ABCD operands.
48043 Ignore AB67, as 67 is already in the second lane
48044 of the first operand. */
48045 if ((perm & 0xc) == (3 << 2)) continue;
48046 /* And 45CD, as 45 is in the first lane of the first
48048 if ((perm & 3) == 2) continue;
48049 /* And 0123, as then the vperm2[fi]128 doesn't change
48050 anything on the original 0123 first operand. */
48051 if ((perm & 0xf) == (1 << 2)) continue;
48054 for (i = 0; i < nelt; i++)
48056 j = d->perm[i] / nelt2;
48057 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48058 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48059 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48060 dsecond.perm[i] = d->perm[i] & (nelt - 1);
48068 ok = expand_vec_perm_1 (&dsecond);
48079 /* Found a usable second shuffle. dfirst will be
48080 vperm2f128 on d->op0 and d->op1. */
48081 dsecond.testing_p = false;
48083 dfirst.target = gen_reg_rtx (d->vmode);
48084 for (i = 0; i < nelt; i++)
48085 dfirst.perm[i] = (i & (nelt2 - 1))
48086 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48088 canonicalize_perm (&dfirst);
48089 ok = expand_vec_perm_1 (&dfirst);
48092 /* And dsecond is some single insn shuffle, taking
48093 d->op0 and result of vperm2f128 (if perm < 16) or
48094 d->op1 and result of vperm2f128 (otherwise). */
48096 dsecond.op0 = dsecond.op1;
48097 dsecond.op1 = dfirst.target;
48099 ok = expand_vec_perm_1 (&dsecond);
48105 /* For one operand, the only useful vperm2f128 permutation is 0x01
48107 if (d->one_operand_p)
48114 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48115 a two vector permutation using 2 intra-lane interleave insns
48116 and cross-lane shuffle for 32-byte vectors. */
48119 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48122 rtx (*gen) (rtx, rtx, rtx);
48124 if (d->one_operand_p)
48126 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48128 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48134 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48136 for (i = 0; i < nelt; i += 2)
48137 if (d->perm[i] != d->perm[0] + i / 2
48138 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48148 gen = gen_vec_interleave_highv32qi;
48150 gen = gen_vec_interleave_lowv32qi;
48154 gen = gen_vec_interleave_highv16hi;
48156 gen = gen_vec_interleave_lowv16hi;
48160 gen = gen_vec_interleave_highv8si;
48162 gen = gen_vec_interleave_lowv8si;
48166 gen = gen_vec_interleave_highv4di;
48168 gen = gen_vec_interleave_lowv4di;
48172 gen = gen_vec_interleave_highv8sf;
48174 gen = gen_vec_interleave_lowv8sf;
48178 gen = gen_vec_interleave_highv4df;
48180 gen = gen_vec_interleave_lowv4df;
48183 gcc_unreachable ();
48186 emit_insn (gen (d->target, d->op0, d->op1));
48190 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48191 a single vector permutation using a single intra-lane vector
48192 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48193 the non-swapped and swapped vectors together. */
48196 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48198 struct expand_vec_perm_d dfirst, dsecond;
48199 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48202 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48206 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48207 || !d->one_operand_p)
48211 for (i = 0; i < nelt; i++)
48212 dfirst.perm[i] = 0xff;
48213 for (i = 0, msk = 0; i < nelt; i++)
48215 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48216 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48218 dfirst.perm[j] = d->perm[i];
48222 for (i = 0; i < nelt; i++)
48223 if (dfirst.perm[i] == 0xff)
48224 dfirst.perm[i] = i;
48227 dfirst.target = gen_reg_rtx (dfirst.vmode);
48230 ok = expand_vec_perm_1 (&dfirst);
48231 seq = get_insns ();
48243 dsecond.op0 = dfirst.target;
48244 dsecond.op1 = dfirst.target;
48245 dsecond.one_operand_p = true;
48246 dsecond.target = gen_reg_rtx (dsecond.vmode);
48247 for (i = 0; i < nelt; i++)
48248 dsecond.perm[i] = i ^ nelt2;
48250 ok = expand_vec_perm_1 (&dsecond);
48253 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48254 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48258 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48259 permutation using two vperm2f128, followed by a vshufpd insn blending
48260 the two vectors together. */
48263 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48265 struct expand_vec_perm_d dfirst, dsecond, dthird;
48268 if (!TARGET_AVX || (d->vmode != V4DFmode))
48278 dfirst.perm[0] = (d->perm[0] & ~1);
48279 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48280 dfirst.perm[2] = (d->perm[2] & ~1);
48281 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48282 dsecond.perm[0] = (d->perm[1] & ~1);
48283 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48284 dsecond.perm[2] = (d->perm[3] & ~1);
48285 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48286 dthird.perm[0] = (d->perm[0] % 2);
48287 dthird.perm[1] = (d->perm[1] % 2) + 4;
48288 dthird.perm[2] = (d->perm[2] % 2) + 2;
48289 dthird.perm[3] = (d->perm[3] % 2) + 6;
48291 dfirst.target = gen_reg_rtx (dfirst.vmode);
48292 dsecond.target = gen_reg_rtx (dsecond.vmode);
48293 dthird.op0 = dfirst.target;
48294 dthird.op1 = dsecond.target;
48295 dthird.one_operand_p = false;
48297 canonicalize_perm (&dfirst);
48298 canonicalize_perm (&dsecond);
48300 ok = expand_vec_perm_1 (&dfirst)
48301 && expand_vec_perm_1 (&dsecond)
48302 && expand_vec_perm_1 (&dthird);
48309 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48310 permutation with two pshufb insns and an ior. We should have already
48311 failed all two instruction sequences. */
48314 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48316 rtx rperm[2][16], vperm, l, h, op, m128;
48317 unsigned int i, nelt, eltsz;
48319 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48321 gcc_assert (!d->one_operand_p);
48327 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48329 /* Generate two permutation masks. If the required element is within
48330 the given vector it is shuffled into the proper lane. If the required
48331 element is in the other vector, force a zero into the lane by setting
48332 bit 7 in the permutation mask. */
48333 m128 = GEN_INT (-128);
48334 for (i = 0; i < nelt; ++i)
48336 unsigned j, e = d->perm[i];
48337 unsigned which = (e >= nelt);
48341 for (j = 0; j < eltsz; ++j)
48343 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48344 rperm[1-which][i*eltsz + j] = m128;
48348 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48349 vperm = force_reg (V16QImode, vperm);
48351 l = gen_reg_rtx (V16QImode);
48352 op = gen_lowpart (V16QImode, d->op0);
48353 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48355 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48356 vperm = force_reg (V16QImode, vperm);
48358 h = gen_reg_rtx (V16QImode);
48359 op = gen_lowpart (V16QImode, d->op1);
48360 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48363 if (d->vmode != V16QImode)
48364 op = gen_reg_rtx (V16QImode);
48365 emit_insn (gen_iorv16qi3 (op, l, h));
48366 if (op != d->target)
48367 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48372 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48373 with two vpshufb insns, vpermq and vpor. We should have already failed
48374 all two or three instruction sequences. */
48377 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48379 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48380 unsigned int i, nelt, eltsz;
48383 || !d->one_operand_p
48384 || (d->vmode != V32QImode && d->vmode != V16HImode))
48391 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48393 /* Generate two permutation masks. If the required element is within
48394 the same lane, it is shuffled in. If the required element from the
48395 other lane, force a zero by setting bit 7 in the permutation mask.
48396 In the other mask the mask has non-negative elements if element
48397 is requested from the other lane, but also moved to the other lane,
48398 so that the result of vpshufb can have the two V2TImode halves
48400 m128 = GEN_INT (-128);
48401 for (i = 0; i < nelt; ++i)
48403 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48404 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48406 for (j = 0; j < eltsz; ++j)
48408 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48409 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48413 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48414 vperm = force_reg (V32QImode, vperm);
48416 h = gen_reg_rtx (V32QImode);
48417 op = gen_lowpart (V32QImode, d->op0);
48418 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48420 /* Swap the 128-byte lanes of h into hp. */
48421 hp = gen_reg_rtx (V4DImode);
48422 op = gen_lowpart (V4DImode, h);
48423 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48426 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48427 vperm = force_reg (V32QImode, vperm);
48429 l = gen_reg_rtx (V32QImode);
48430 op = gen_lowpart (V32QImode, d->op0);
48431 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48434 if (d->vmode != V32QImode)
48435 op = gen_reg_rtx (V32QImode);
48436 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48437 if (op != d->target)
48438 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48443 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48444 and extract-odd permutations of two V32QImode and V16QImode operand
48445 with two vpshufb insns, vpor and vpermq. We should have already
48446 failed all two or three instruction sequences. */
48449 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48451 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48452 unsigned int i, nelt, eltsz;
48455 || d->one_operand_p
48456 || (d->vmode != V32QImode && d->vmode != V16HImode))
48459 for (i = 0; i < d->nelt; ++i)
48460 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48467 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48469 /* Generate two permutation masks. In the first permutation mask
48470 the first quarter will contain indexes for the first half
48471 of the op0, the second quarter will contain bit 7 set, third quarter
48472 will contain indexes for the second half of the op0 and the
48473 last quarter bit 7 set. In the second permutation mask
48474 the first quarter will contain bit 7 set, the second quarter
48475 indexes for the first half of the op1, the third quarter bit 7 set
48476 and last quarter indexes for the second half of the op1.
48477 I.e. the first mask e.g. for V32QImode extract even will be:
48478 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48479 (all values masked with 0xf except for -128) and second mask
48480 for extract even will be
48481 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48482 m128 = GEN_INT (-128);
48483 for (i = 0; i < nelt; ++i)
48485 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48486 unsigned which = d->perm[i] >= nelt;
48487 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48489 for (j = 0; j < eltsz; ++j)
48491 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48492 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48496 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48497 vperm = force_reg (V32QImode, vperm);
48499 l = gen_reg_rtx (V32QImode);
48500 op = gen_lowpart (V32QImode, d->op0);
48501 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48503 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48504 vperm = force_reg (V32QImode, vperm);
48506 h = gen_reg_rtx (V32QImode);
48507 op = gen_lowpart (V32QImode, d->op1);
48508 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48510 ior = gen_reg_rtx (V32QImode);
48511 emit_insn (gen_iorv32qi3 (ior, l, h));
48513 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48514 op = gen_reg_rtx (V4DImode);
48515 ior = gen_lowpart (V4DImode, ior);
48516 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48517 const1_rtx, GEN_INT (3)));
48518 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48523 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48524 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48525 with two "and" and "pack" or two "shift" and "pack" insns. We should
48526 have already failed all two instruction sequences. */
48529 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48531 rtx op, dop0, dop1, t, rperm[16];
48532 unsigned i, odd, c, s, nelt = d->nelt;
48533 bool end_perm = false;
48534 machine_mode half_mode;
48535 rtx (*gen_and) (rtx, rtx, rtx);
48536 rtx (*gen_pack) (rtx, rtx, rtx);
48537 rtx (*gen_shift) (rtx, rtx, rtx);
48539 if (d->one_operand_p)
48545 /* Required for "pack". */
48546 if (!TARGET_SSE4_1)
48550 half_mode = V4SImode;
48551 gen_and = gen_andv4si3;
48552 gen_pack = gen_sse4_1_packusdw;
48553 gen_shift = gen_lshrv4si3;
48556 /* No check as all instructions are SSE2. */
48559 half_mode = V8HImode;
48560 gen_and = gen_andv8hi3;
48561 gen_pack = gen_sse2_packuswb;
48562 gen_shift = gen_lshrv8hi3;
48569 half_mode = V8SImode;
48570 gen_and = gen_andv8si3;
48571 gen_pack = gen_avx2_packusdw;
48572 gen_shift = gen_lshrv8si3;
48580 half_mode = V16HImode;
48581 gen_and = gen_andv16hi3;
48582 gen_pack = gen_avx2_packuswb;
48583 gen_shift = gen_lshrv16hi3;
48587 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
48588 general shuffles. */
48592 /* Check that permutation is even or odd. */
48597 for (i = 1; i < nelt; ++i)
48598 if (d->perm[i] != 2 * i + odd)
48604 dop0 = gen_reg_rtx (half_mode);
48605 dop1 = gen_reg_rtx (half_mode);
48608 for (i = 0; i < nelt / 2; i++)
48609 rperm[i] = GEN_INT (c);
48610 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
48611 t = force_reg (half_mode, t);
48612 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
48613 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
48617 emit_insn (gen_shift (dop0,
48618 gen_lowpart (half_mode, d->op0),
48620 emit_insn (gen_shift (dop1,
48621 gen_lowpart (half_mode, d->op1),
48624 /* In AVX2 for 256 bit case we need to permute pack result. */
48625 if (TARGET_AVX2 && end_perm)
48627 op = gen_reg_rtx (d->vmode);
48628 t = gen_reg_rtx (V4DImode);
48629 emit_insn (gen_pack (op, dop0, dop1));
48630 emit_insn (gen_avx2_permv4di_1 (t,
48631 gen_lowpart (V4DImode, op),
48636 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
48639 emit_insn (gen_pack (d->target, dop0, dop1));
48644 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
48645 and extract-odd permutations. */
48648 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
48650 rtx t1, t2, t3, t4, t5;
48657 t1 = gen_reg_rtx (V4DFmode);
48658 t2 = gen_reg_rtx (V4DFmode);
48660 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48661 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
48662 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
48664 /* Now an unpck[lh]pd will produce the result required. */
48666 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
48668 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
48674 int mask = odd ? 0xdd : 0x88;
48678 t1 = gen_reg_rtx (V8SFmode);
48679 t2 = gen_reg_rtx (V8SFmode);
48680 t3 = gen_reg_rtx (V8SFmode);
48682 /* Shuffle within the 128-bit lanes to produce:
48683 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
48684 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
48687 /* Shuffle the lanes around to produce:
48688 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
48689 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
48692 /* Shuffle within the 128-bit lanes to produce:
48693 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
48694 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
48696 /* Shuffle within the 128-bit lanes to produce:
48697 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
48698 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
48700 /* Shuffle the lanes around to produce:
48701 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
48702 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
48711 /* These are always directly implementable by expand_vec_perm_1. */
48712 gcc_unreachable ();
48716 return expand_vec_perm_even_odd_pack (d);
48717 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
48718 return expand_vec_perm_pshufb2 (d);
48723 /* We need 2*log2(N)-1 operations to achieve odd/even
48724 with interleave. */
48725 t1 = gen_reg_rtx (V8HImode);
48726 t2 = gen_reg_rtx (V8HImode);
48727 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
48728 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
48729 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
48730 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
48732 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
48734 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
48740 return expand_vec_perm_even_odd_pack (d);
48744 return expand_vec_perm_even_odd_pack (d);
48749 struct expand_vec_perm_d d_copy = *d;
48750 d_copy.vmode = V4DFmode;
48752 d_copy.target = gen_lowpart (V4DFmode, d->target);
48754 d_copy.target = gen_reg_rtx (V4DFmode);
48755 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
48756 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
48757 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48760 emit_move_insn (d->target,
48761 gen_lowpart (V4DImode, d_copy.target));
48770 t1 = gen_reg_rtx (V4DImode);
48771 t2 = gen_reg_rtx (V4DImode);
48773 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48774 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
48775 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
48777 /* Now an vpunpck[lh]qdq will produce the result required. */
48779 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
48781 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
48788 struct expand_vec_perm_d d_copy = *d;
48789 d_copy.vmode = V8SFmode;
48791 d_copy.target = gen_lowpart (V8SFmode, d->target);
48793 d_copy.target = gen_reg_rtx (V8SFmode);
48794 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
48795 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
48796 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48799 emit_move_insn (d->target,
48800 gen_lowpart (V8SImode, d_copy.target));
48809 t1 = gen_reg_rtx (V8SImode);
48810 t2 = gen_reg_rtx (V8SImode);
48811 t3 = gen_reg_rtx (V4DImode);
48812 t4 = gen_reg_rtx (V4DImode);
48813 t5 = gen_reg_rtx (V4DImode);
48815 /* Shuffle the lanes around into
48816 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
48817 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
48818 gen_lowpart (V4DImode, d->op1),
48820 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
48821 gen_lowpart (V4DImode, d->op1),
48824 /* Swap the 2nd and 3rd position in each lane into
48825 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
48826 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
48827 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48828 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
48829 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48831 /* Now an vpunpck[lh]qdq will produce
48832 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
48834 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
48835 gen_lowpart (V4DImode, t2));
48837 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
48838 gen_lowpart (V4DImode, t2));
48840 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
48844 gcc_unreachable ();
48850 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48851 extract-even and extract-odd permutations. */
48854 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
48856 unsigned i, odd, nelt = d->nelt;
48859 if (odd != 0 && odd != 1)
48862 for (i = 1; i < nelt; ++i)
48863 if (d->perm[i] != 2 * i + odd)
48866 return expand_vec_perm_even_odd_1 (d, odd);
48869 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
48870 permutations. We assume that expand_vec_perm_1 has already failed. */
48873 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
48875 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
48876 machine_mode vmode = d->vmode;
48877 unsigned char perm2[4];
48878 rtx op0 = d->op0, dest;
48885 /* These are special-cased in sse.md so that we can optionally
48886 use the vbroadcast instruction. They expand to two insns
48887 if the input happens to be in a register. */
48888 gcc_unreachable ();
48894 /* These are always implementable using standard shuffle patterns. */
48895 gcc_unreachable ();
48899 /* These can be implemented via interleave. We save one insn by
48900 stopping once we have promoted to V4SImode and then use pshufd. */
48906 rtx (*gen) (rtx, rtx, rtx)
48907 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
48908 : gen_vec_interleave_lowv8hi;
48912 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
48913 : gen_vec_interleave_highv8hi;
48918 dest = gen_reg_rtx (vmode);
48919 emit_insn (gen (dest, op0, op0));
48920 vmode = get_mode_wider_vector (vmode);
48921 op0 = gen_lowpart (vmode, dest);
48923 while (vmode != V4SImode);
48925 memset (perm2, elt, 4);
48926 dest = gen_reg_rtx (V4SImode);
48927 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
48930 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
48938 /* For AVX2 broadcasts of the first element vpbroadcast* or
48939 vpermq should be used by expand_vec_perm_1. */
48940 gcc_assert (!TARGET_AVX2 || d->perm[0]);
48944 gcc_unreachable ();
48948 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48949 broadcast permutations. */
48952 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
48954 unsigned i, elt, nelt = d->nelt;
48956 if (!d->one_operand_p)
48960 for (i = 1; i < nelt; ++i)
48961 if (d->perm[i] != elt)
48964 return expand_vec_perm_broadcast_1 (d);
48967 /* Implement arbitrary permutations of two V64QImode operands
48968 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
48970 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
48972 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
48978 struct expand_vec_perm_d ds[2];
48979 rtx rperm[128], vperm, target0, target1;
48980 unsigned int i, nelt;
48981 machine_mode vmode;
48986 for (i = 0; i < 2; i++)
48989 ds[i].vmode = V32HImode;
48991 ds[i].target = gen_reg_rtx (V32HImode);
48992 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
48993 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
48996 /* Prepare permutations such that the first one takes care of
48997 putting the even bytes into the right positions or one higher
48998 positions (ds[0]) and the second one takes care of
48999 putting the odd bytes into the right positions or one below
49002 for (i = 0; i < nelt; i++)
49004 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
49007 rperm[i] = constm1_rtx;
49008 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49012 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49013 rperm[i + 64] = constm1_rtx;
49017 bool ok = expand_vec_perm_1 (&ds[0]);
49019 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
49021 ok = expand_vec_perm_1 (&ds[1]);
49023 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
49025 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
49026 vperm = force_reg (vmode, vperm);
49027 target0 = gen_reg_rtx (V64QImode);
49028 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
49030 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
49031 vperm = force_reg (vmode, vperm);
49032 target1 = gen_reg_rtx (V64QImode);
49033 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
49035 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
49039 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
49040 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
49041 all the shorter instruction sequences. */
49044 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
49046 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
49047 unsigned int i, nelt, eltsz;
49051 || d->one_operand_p
49052 || (d->vmode != V32QImode && d->vmode != V16HImode))
49059 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
49061 /* Generate 4 permutation masks. If the required element is within
49062 the same lane, it is shuffled in. If the required element from the
49063 other lane, force a zero by setting bit 7 in the permutation mask.
49064 In the other mask the mask has non-negative elements if element
49065 is requested from the other lane, but also moved to the other lane,
49066 so that the result of vpshufb can have the two V2TImode halves
49068 m128 = GEN_INT (-128);
49069 for (i = 0; i < 32; ++i)
49071 rperm[0][i] = m128;
49072 rperm[1][i] = m128;
49073 rperm[2][i] = m128;
49074 rperm[3][i] = m128;
49080 for (i = 0; i < nelt; ++i)
49082 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
49083 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
49084 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
49086 for (j = 0; j < eltsz; ++j)
49087 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
49088 used[which] = true;
49091 for (i = 0; i < 2; ++i)
49093 if (!used[2 * i + 1])
49098 vperm = gen_rtx_CONST_VECTOR (V32QImode,
49099 gen_rtvec_v (32, rperm[2 * i + 1]));
49100 vperm = force_reg (V32QImode, vperm);
49101 h[i] = gen_reg_rtx (V32QImode);
49102 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49103 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
49106 /* Swap the 128-byte lanes of h[X]. */
49107 for (i = 0; i < 2; ++i)
49109 if (h[i] == NULL_RTX)
49111 op = gen_reg_rtx (V4DImode);
49112 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
49113 const2_rtx, GEN_INT (3), const0_rtx,
49115 h[i] = gen_lowpart (V32QImode, op);
49118 for (i = 0; i < 2; ++i)
49125 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49126 vperm = force_reg (V32QImode, vperm);
49127 l[i] = gen_reg_rtx (V32QImode);
49128 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49129 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49132 for (i = 0; i < 2; ++i)
49136 op = gen_reg_rtx (V32QImode);
49137 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49144 gcc_assert (l[0] && l[1]);
49146 if (d->vmode != V32QImode)
49147 op = gen_reg_rtx (V32QImode);
49148 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49149 if (op != d->target)
49150 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49154 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49155 With all of the interface bits taken care of, perform the expansion
49156 in D and return true on success. */
49159 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49161 /* Try a single instruction expansion. */
49162 if (expand_vec_perm_1 (d))
49165 /* Try sequences of two instructions. */
49167 if (expand_vec_perm_pshuflw_pshufhw (d))
49170 if (expand_vec_perm_palignr (d, false))
49173 if (expand_vec_perm_interleave2 (d))
49176 if (expand_vec_perm_broadcast (d))
49179 if (expand_vec_perm_vpermq_perm_1 (d))
49182 if (expand_vec_perm_vperm2f128 (d))
49185 if (expand_vec_perm_pblendv (d))
49188 /* Try sequences of three instructions. */
49190 if (expand_vec_perm_even_odd_pack (d))
49193 if (expand_vec_perm_2vperm2f128_vshuf (d))
49196 if (expand_vec_perm_pshufb2 (d))
49199 if (expand_vec_perm_interleave3 (d))
49202 if (expand_vec_perm_vperm2f128_vblend (d))
49205 /* Try sequences of four instructions. */
49207 if (expand_vec_perm_vpshufb2_vpermq (d))
49210 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49213 if (expand_vec_perm_vpermi2_vpshub2 (d))
49216 /* ??? Look for narrow permutations whose element orderings would
49217 allow the promotion to a wider mode. */
49219 /* ??? Look for sequences of interleave or a wider permute that place
49220 the data into the correct lanes for a half-vector shuffle like
49221 pshuf[lh]w or vpermilps. */
49223 /* ??? Look for sequences of interleave that produce the desired results.
49224 The combinatorics of punpck[lh] get pretty ugly... */
49226 if (expand_vec_perm_even_odd (d))
49229 /* Even longer sequences. */
49230 if (expand_vec_perm_vpshufb4_vpermq2 (d))
49236 /* If a permutation only uses one operand, make it clear. Returns true
49237 if the permutation references both operands. */
49240 canonicalize_perm (struct expand_vec_perm_d *d)
49242 int i, which, nelt = d->nelt;
49244 for (i = which = 0; i < nelt; ++i)
49245 which |= (d->perm[i] < nelt ? 1 : 2);
49247 d->one_operand_p = true;
49254 if (!rtx_equal_p (d->op0, d->op1))
49256 d->one_operand_p = false;
49259 /* The elements of PERM do not suggest that only the first operand
49260 is used, but both operands are identical. Allow easier matching
49261 of the permutation by folding the permutation into the single
49266 for (i = 0; i < nelt; ++i)
49267 d->perm[i] &= nelt - 1;
49276 return (which == 3);
49280 ix86_expand_vec_perm_const (rtx operands[4])
49282 struct expand_vec_perm_d d;
49283 unsigned char perm[MAX_VECT_LEN];
49288 d.target = operands[0];
49289 d.op0 = operands[1];
49290 d.op1 = operands[2];
49293 d.vmode = GET_MODE (d.target);
49294 gcc_assert (VECTOR_MODE_P (d.vmode));
49295 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49296 d.testing_p = false;
49298 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49299 gcc_assert (XVECLEN (sel, 0) == nelt);
49300 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49302 for (i = 0; i < nelt; ++i)
49304 rtx e = XVECEXP (sel, 0, i);
49305 int ei = INTVAL (e) & (2 * nelt - 1);
49310 two_args = canonicalize_perm (&d);
49312 if (ix86_expand_vec_perm_const_1 (&d))
49315 /* If the selector says both arguments are needed, but the operands are the
49316 same, the above tried to expand with one_operand_p and flattened selector.
49317 If that didn't work, retry without one_operand_p; we succeeded with that
49319 if (two_args && d.one_operand_p)
49321 d.one_operand_p = false;
49322 memcpy (d.perm, perm, sizeof (perm));
49323 return ix86_expand_vec_perm_const_1 (&d);
49329 /* Implement targetm.vectorize.vec_perm_const_ok. */
49332 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49333 const unsigned char *sel)
49335 struct expand_vec_perm_d d;
49336 unsigned int i, nelt, which;
49340 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49341 d.testing_p = true;
49343 /* Given sufficient ISA support we can just return true here
49344 for selected vector modes. */
49351 if (TARGET_AVX512F)
49352 /* All implementable with a single vpermi2 insn. */
49356 if (TARGET_AVX512BW)
49357 /* All implementable with a single vpermi2 insn. */
49361 if (TARGET_AVX512BW)
49362 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
49369 if (TARGET_AVX512VL)
49370 /* All implementable with a single vpermi2 insn. */
49375 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49380 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49387 /* All implementable with a single vpperm insn. */
49390 /* All implementable with 2 pshufb + 1 ior. */
49396 /* All implementable with shufpd or unpck[lh]pd. */
49402 /* Extract the values from the vector CST into the permutation
49404 memcpy (d.perm, sel, nelt);
49405 for (i = which = 0; i < nelt; ++i)
49407 unsigned char e = d.perm[i];
49408 gcc_assert (e < 2 * nelt);
49409 which |= (e < nelt ? 1 : 2);
49412 /* For all elements from second vector, fold the elements to first. */
49414 for (i = 0; i < nelt; ++i)
49417 /* Check whether the mask can be applied to the vector type. */
49418 d.one_operand_p = (which != 3);
49420 /* Implementable with shufps or pshufd. */
49421 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49424 /* Otherwise we have to go through the motions and see if we can
49425 figure out how to generate the requested permutation. */
49426 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49427 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49428 if (!d.one_operand_p)
49429 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49432 ret = ix86_expand_vec_perm_const_1 (&d);
49439 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49441 struct expand_vec_perm_d d;
49447 d.vmode = GET_MODE (targ);
49448 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49449 d.one_operand_p = false;
49450 d.testing_p = false;
49452 for (i = 0; i < nelt; ++i)
49453 d.perm[i] = i * 2 + odd;
49455 /* We'll either be able to implement the permutation directly... */
49456 if (expand_vec_perm_1 (&d))
49459 /* ... or we use the special-case patterns. */
49460 expand_vec_perm_even_odd_1 (&d, odd);
49464 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49466 struct expand_vec_perm_d d;
49467 unsigned i, nelt, base;
49473 d.vmode = GET_MODE (targ);
49474 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49475 d.one_operand_p = false;
49476 d.testing_p = false;
49478 base = high_p ? nelt / 2 : 0;
49479 for (i = 0; i < nelt / 2; ++i)
49481 d.perm[i * 2] = i + base;
49482 d.perm[i * 2 + 1] = i + base + nelt;
49485 /* Note that for AVX this isn't one instruction. */
49486 ok = ix86_expand_vec_perm_const_1 (&d);
49491 /* Expand a vector operation CODE for a V*QImode in terms of the
49492 same operation on V*HImode. */
49495 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49497 machine_mode qimode = GET_MODE (dest);
49498 machine_mode himode;
49499 rtx (*gen_il) (rtx, rtx, rtx);
49500 rtx (*gen_ih) (rtx, rtx, rtx);
49501 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49502 struct expand_vec_perm_d d;
49503 bool ok, full_interleave;
49504 bool uns_p = false;
49511 gen_il = gen_vec_interleave_lowv16qi;
49512 gen_ih = gen_vec_interleave_highv16qi;
49515 himode = V16HImode;
49516 gen_il = gen_avx2_interleave_lowv32qi;
49517 gen_ih = gen_avx2_interleave_highv32qi;
49520 himode = V32HImode;
49521 gen_il = gen_avx512bw_interleave_lowv64qi;
49522 gen_ih = gen_avx512bw_interleave_highv64qi;
49525 gcc_unreachable ();
49528 op2_l = op2_h = op2;
49532 /* Unpack data such that we've got a source byte in each low byte of
49533 each word. We don't care what goes into the high byte of each word.
49534 Rather than trying to get zero in there, most convenient is to let
49535 it be a copy of the low byte. */
49536 op2_l = gen_reg_rtx (qimode);
49537 op2_h = gen_reg_rtx (qimode);
49538 emit_insn (gen_il (op2_l, op2, op2));
49539 emit_insn (gen_ih (op2_h, op2, op2));
49542 op1_l = gen_reg_rtx (qimode);
49543 op1_h = gen_reg_rtx (qimode);
49544 emit_insn (gen_il (op1_l, op1, op1));
49545 emit_insn (gen_ih (op1_h, op1, op1));
49546 full_interleave = qimode == V16QImode;
49554 op1_l = gen_reg_rtx (himode);
49555 op1_h = gen_reg_rtx (himode);
49556 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
49557 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
49558 full_interleave = true;
49561 gcc_unreachable ();
49564 /* Perform the operation. */
49565 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
49567 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
49569 gcc_assert (res_l && res_h);
49571 /* Merge the data back into the right place. */
49573 d.op0 = gen_lowpart (qimode, res_l);
49574 d.op1 = gen_lowpart (qimode, res_h);
49576 d.nelt = GET_MODE_NUNITS (qimode);
49577 d.one_operand_p = false;
49578 d.testing_p = false;
49580 if (full_interleave)
49582 /* For SSE2, we used an full interleave, so the desired
49583 results are in the even elements. */
49584 for (i = 0; i < 64; ++i)
49589 /* For AVX, the interleave used above was not cross-lane. So the
49590 extraction is evens but with the second and third quarter swapped.
49591 Happily, that is even one insn shorter than even extraction. */
49592 for (i = 0; i < 64; ++i)
49593 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
49596 ok = ix86_expand_vec_perm_const_1 (&d);
49599 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49600 gen_rtx_fmt_ee (code, qimode, op1, op2));
49603 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
49604 if op is CONST_VECTOR with all odd elements equal to their
49605 preceding element. */
49608 const_vector_equal_evenodd_p (rtx op)
49610 machine_mode mode = GET_MODE (op);
49611 int i, nunits = GET_MODE_NUNITS (mode);
49612 if (GET_CODE (op) != CONST_VECTOR
49613 || nunits != CONST_VECTOR_NUNITS (op))
49615 for (i = 0; i < nunits; i += 2)
49616 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
49622 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
49623 bool uns_p, bool odd_p)
49625 machine_mode mode = GET_MODE (op1);
49626 machine_mode wmode = GET_MODE (dest);
49628 rtx orig_op1 = op1, orig_op2 = op2;
49630 if (!nonimmediate_operand (op1, mode))
49631 op1 = force_reg (mode, op1);
49632 if (!nonimmediate_operand (op2, mode))
49633 op2 = force_reg (mode, op2);
49635 /* We only play even/odd games with vectors of SImode. */
49636 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
49638 /* If we're looking for the odd results, shift those members down to
49639 the even slots. For some cpus this is faster than a PSHUFD. */
49642 /* For XOP use vpmacsdqh, but only for smult, as it is only
49644 if (TARGET_XOP && mode == V4SImode && !uns_p)
49646 x = force_reg (wmode, CONST0_RTX (wmode));
49647 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
49651 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
49652 if (!const_vector_equal_evenodd_p (orig_op1))
49653 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
49654 x, NULL, 1, OPTAB_DIRECT);
49655 if (!const_vector_equal_evenodd_p (orig_op2))
49656 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
49657 x, NULL, 1, OPTAB_DIRECT);
49658 op1 = gen_lowpart (mode, op1);
49659 op2 = gen_lowpart (mode, op2);
49662 if (mode == V16SImode)
49665 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
49667 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
49669 else if (mode == V8SImode)
49672 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
49674 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
49677 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
49678 else if (TARGET_SSE4_1)
49679 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
49682 rtx s1, s2, t0, t1, t2;
49684 /* The easiest way to implement this without PMULDQ is to go through
49685 the motions as if we are performing a full 64-bit multiply. With
49686 the exception that we need to do less shuffling of the elements. */
49688 /* Compute the sign-extension, aka highparts, of the two operands. */
49689 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49690 op1, pc_rtx, pc_rtx);
49691 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49692 op2, pc_rtx, pc_rtx);
49694 /* Multiply LO(A) * HI(B), and vice-versa. */
49695 t1 = gen_reg_rtx (wmode);
49696 t2 = gen_reg_rtx (wmode);
49697 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
49698 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
49700 /* Multiply LO(A) * LO(B). */
49701 t0 = gen_reg_rtx (wmode);
49702 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
49704 /* Combine and shift the highparts into place. */
49705 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
49706 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
49709 /* Combine high and low parts. */
49710 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
49717 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
49718 bool uns_p, bool high_p)
49720 machine_mode wmode = GET_MODE (dest);
49721 machine_mode mode = GET_MODE (op1);
49722 rtx t1, t2, t3, t4, mask;
49727 t1 = gen_reg_rtx (mode);
49728 t2 = gen_reg_rtx (mode);
49729 if (TARGET_XOP && !uns_p)
49731 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
49732 shuffle the elements once so that all elements are in the right
49733 place for immediate use: { A C B D }. */
49734 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
49735 const1_rtx, GEN_INT (3)));
49736 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
49737 const1_rtx, GEN_INT (3)));
49741 /* Put the elements into place for the multiply. */
49742 ix86_expand_vec_interleave (t1, op1, op1, high_p);
49743 ix86_expand_vec_interleave (t2, op2, op2, high_p);
49746 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
49750 /* Shuffle the elements between the lanes. After this we
49751 have { A B E F | C D G H } for each operand. */
49752 t1 = gen_reg_rtx (V4DImode);
49753 t2 = gen_reg_rtx (V4DImode);
49754 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
49755 const0_rtx, const2_rtx,
49756 const1_rtx, GEN_INT (3)));
49757 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
49758 const0_rtx, const2_rtx,
49759 const1_rtx, GEN_INT (3)));
49761 /* Shuffle the elements within the lanes. After this we
49762 have { A A B B | C C D D } or { E E F F | G G H H }. */
49763 t3 = gen_reg_rtx (V8SImode);
49764 t4 = gen_reg_rtx (V8SImode);
49765 mask = GEN_INT (high_p
49766 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
49767 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
49768 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
49769 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
49771 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
49776 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
49777 uns_p, OPTAB_DIRECT);
49778 t2 = expand_binop (mode,
49779 uns_p ? umul_highpart_optab : smul_highpart_optab,
49780 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
49781 gcc_assert (t1 && t2);
49783 t3 = gen_reg_rtx (mode);
49784 ix86_expand_vec_interleave (t3, t1, t2, high_p);
49785 emit_move_insn (dest, gen_lowpart (wmode, t3));
49793 t1 = gen_reg_rtx (wmode);
49794 t2 = gen_reg_rtx (wmode);
49795 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
49796 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
49798 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
49802 gcc_unreachable ();
49807 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
49809 rtx res_1, res_2, res_3, res_4;
49811 res_1 = gen_reg_rtx (V4SImode);
49812 res_2 = gen_reg_rtx (V4SImode);
49813 res_3 = gen_reg_rtx (V2DImode);
49814 res_4 = gen_reg_rtx (V2DImode);
49815 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
49816 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
49818 /* Move the results in element 2 down to element 1; we don't care
49819 what goes in elements 2 and 3. Then we can merge the parts
49820 back together with an interleave.
49822 Note that two other sequences were tried:
49823 (1) Use interleaves at the start instead of psrldq, which allows
49824 us to use a single shufps to merge things back at the end.
49825 (2) Use shufps here to combine the two vectors, then pshufd to
49826 put the elements in the correct order.
49827 In both cases the cost of the reformatting stall was too high
49828 and the overall sequence slower. */
49830 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
49831 const0_rtx, const2_rtx,
49832 const0_rtx, const0_rtx));
49833 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
49834 const0_rtx, const2_rtx,
49835 const0_rtx, const0_rtx));
49836 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
49838 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
49842 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
49844 machine_mode mode = GET_MODE (op0);
49845 rtx t1, t2, t3, t4, t5, t6;
49847 if (TARGET_AVX512DQ && mode == V8DImode)
49848 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
49849 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
49850 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
49851 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
49852 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
49853 else if (TARGET_XOP && mode == V2DImode)
49855 /* op1: A,B,C,D, op2: E,F,G,H */
49856 op1 = gen_lowpart (V4SImode, op1);
49857 op2 = gen_lowpart (V4SImode, op2);
49859 t1 = gen_reg_rtx (V4SImode);
49860 t2 = gen_reg_rtx (V4SImode);
49861 t3 = gen_reg_rtx (V2DImode);
49862 t4 = gen_reg_rtx (V2DImode);
49865 emit_insn (gen_sse2_pshufd_1 (t1, op1,
49871 /* t2: (B*E),(A*F),(D*G),(C*H) */
49872 emit_insn (gen_mulv4si3 (t2, t1, op2));
49874 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
49875 emit_insn (gen_xop_phadddq (t3, t2));
49877 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
49878 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
49880 /* Multiply lower parts and add all */
49881 t5 = gen_reg_rtx (V2DImode);
49882 emit_insn (gen_vec_widen_umult_even_v4si (t5,
49883 gen_lowpart (V4SImode, op1),
49884 gen_lowpart (V4SImode, op2)));
49885 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
49890 machine_mode nmode;
49891 rtx (*umul) (rtx, rtx, rtx);
49893 if (mode == V2DImode)
49895 umul = gen_vec_widen_umult_even_v4si;
49898 else if (mode == V4DImode)
49900 umul = gen_vec_widen_umult_even_v8si;
49903 else if (mode == V8DImode)
49905 umul = gen_vec_widen_umult_even_v16si;
49909 gcc_unreachable ();
49912 /* Multiply low parts. */
49913 t1 = gen_reg_rtx (mode);
49914 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
49916 /* Shift input vectors right 32 bits so we can multiply high parts. */
49918 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
49919 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
49921 /* Multiply high parts by low parts. */
49922 t4 = gen_reg_rtx (mode);
49923 t5 = gen_reg_rtx (mode);
49924 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
49925 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
49927 /* Combine and shift the highparts back. */
49928 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
49929 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
49931 /* Combine high and low parts. */
49932 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
49935 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49936 gen_rtx_MULT (mode, op1, op2));
49939 /* Return 1 if control tansfer instruction INSN
49940 should be encoded with bnd prefix.
49941 If insn is NULL then return 1 when control
49942 transfer instructions should be prefixed with
49943 bnd by default for current function. */
49946 ix86_bnd_prefixed_insn_p (rtx insn)
49948 /* For call insns check special flag. */
49949 if (insn && CALL_P (insn))
49951 rtx call = get_call_rtx_from (insn);
49953 return CALL_EXPR_WITH_BOUNDS_P (call);
49956 /* All other insns are prefixed only if function is instrumented. */
49957 return chkp_function_instrumented_p (current_function_decl);
49960 /* Calculate integer abs() using only SSE2 instructions. */
49963 ix86_expand_sse2_abs (rtx target, rtx input)
49965 machine_mode mode = GET_MODE (target);
49970 /* For 32-bit signed integer X, the best way to calculate the absolute
49971 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
49973 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
49974 GEN_INT (GET_MODE_BITSIZE
49975 (GET_MODE_INNER (mode)) - 1),
49976 NULL, 0, OPTAB_DIRECT);
49977 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
49978 NULL, 0, OPTAB_DIRECT);
49979 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
49980 target, 0, OPTAB_DIRECT);
49983 /* For 16-bit signed integer X, the best way to calculate the absolute
49984 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
49986 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
49988 x = expand_simple_binop (mode, SMAX, tmp0, input,
49989 target, 0, OPTAB_DIRECT);
49992 /* For 8-bit signed integer X, the best way to calculate the absolute
49993 value of X is min ((unsigned char) X, (unsigned char) (-X)),
49994 as SSE2 provides the PMINUB insn. */
49996 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
49998 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
49999 target, 0, OPTAB_DIRECT);
50003 gcc_unreachable ();
50007 emit_move_insn (target, x);
50010 /* Expand an insert into a vector register through pinsr insn.
50011 Return true if successful. */
50014 ix86_expand_pinsr (rtx *operands)
50016 rtx dst = operands[0];
50017 rtx src = operands[3];
50019 unsigned int size = INTVAL (operands[1]);
50020 unsigned int pos = INTVAL (operands[2]);
50022 if (GET_CODE (dst) == SUBREG)
50024 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
50025 dst = SUBREG_REG (dst);
50028 if (GET_CODE (src) == SUBREG)
50029 src = SUBREG_REG (src);
50031 switch (GET_MODE (dst))
50038 machine_mode srcmode, dstmode;
50039 rtx (*pinsr)(rtx, rtx, rtx, rtx);
50041 srcmode = mode_for_size (size, MODE_INT, 0);
50046 if (!TARGET_SSE4_1)
50048 dstmode = V16QImode;
50049 pinsr = gen_sse4_1_pinsrb;
50055 dstmode = V8HImode;
50056 pinsr = gen_sse2_pinsrw;
50060 if (!TARGET_SSE4_1)
50062 dstmode = V4SImode;
50063 pinsr = gen_sse4_1_pinsrd;
50067 gcc_assert (TARGET_64BIT);
50068 if (!TARGET_SSE4_1)
50070 dstmode = V2DImode;
50071 pinsr = gen_sse4_1_pinsrq;
50079 if (GET_MODE (dst) != dstmode)
50080 d = gen_reg_rtx (dstmode);
50081 src = gen_lowpart (srcmode, src);
50085 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
50086 GEN_INT (1 << pos)));
50088 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50097 /* This function returns the calling abi specific va_list type node.
50098 It returns the FNDECL specific va_list type. */
50101 ix86_fn_abi_va_list (tree fndecl)
50104 return va_list_type_node;
50105 gcc_assert (fndecl != NULL_TREE);
50107 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
50108 return ms_va_list_type_node;
50110 return sysv_va_list_type_node;
50113 /* Returns the canonical va_list type specified by TYPE. If there
50114 is no valid TYPE provided, it return NULL_TREE. */
50117 ix86_canonical_va_list_type (tree type)
50121 /* Resolve references and pointers to va_list type. */
50122 if (TREE_CODE (type) == MEM_REF)
50123 type = TREE_TYPE (type);
50124 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
50125 type = TREE_TYPE (type);
50126 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
50127 type = TREE_TYPE (type);
50129 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
50131 wtype = va_list_type_node;
50132 gcc_assert (wtype != NULL_TREE);
50134 if (TREE_CODE (wtype) == ARRAY_TYPE)
50136 /* If va_list is an array type, the argument may have decayed
50137 to a pointer type, e.g. by being passed to another function.
50138 In that case, unwrap both types so that we can compare the
50139 underlying records. */
50140 if (TREE_CODE (htype) == ARRAY_TYPE
50141 || POINTER_TYPE_P (htype))
50143 wtype = TREE_TYPE (wtype);
50144 htype = TREE_TYPE (htype);
50147 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50148 return va_list_type_node;
50149 wtype = sysv_va_list_type_node;
50150 gcc_assert (wtype != NULL_TREE);
50152 if (TREE_CODE (wtype) == ARRAY_TYPE)
50154 /* If va_list is an array type, the argument may have decayed
50155 to a pointer type, e.g. by being passed to another function.
50156 In that case, unwrap both types so that we can compare the
50157 underlying records. */
50158 if (TREE_CODE (htype) == ARRAY_TYPE
50159 || POINTER_TYPE_P (htype))
50161 wtype = TREE_TYPE (wtype);
50162 htype = TREE_TYPE (htype);
50165 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50166 return sysv_va_list_type_node;
50167 wtype = ms_va_list_type_node;
50168 gcc_assert (wtype != NULL_TREE);
50170 if (TREE_CODE (wtype) == ARRAY_TYPE)
50172 /* If va_list is an array type, the argument may have decayed
50173 to a pointer type, e.g. by being passed to another function.
50174 In that case, unwrap both types so that we can compare the
50175 underlying records. */
50176 if (TREE_CODE (htype) == ARRAY_TYPE
50177 || POINTER_TYPE_P (htype))
50179 wtype = TREE_TYPE (wtype);
50180 htype = TREE_TYPE (htype);
50183 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50184 return ms_va_list_type_node;
50187 return std_canonical_va_list_type (type);
50190 /* Iterate through the target-specific builtin types for va_list.
50191 IDX denotes the iterator, *PTREE is set to the result type of
50192 the va_list builtin, and *PNAME to its internal type.
50193 Returns zero if there is no element for this index, otherwise
50194 IDX should be increased upon the next call.
50195 Note, do not iterate a base builtin's name like __builtin_va_list.
50196 Used from c_common_nodes_and_builtins. */
50199 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50209 *ptree = ms_va_list_type_node;
50210 *pname = "__builtin_ms_va_list";
50214 *ptree = sysv_va_list_type_node;
50215 *pname = "__builtin_sysv_va_list";
50223 #undef TARGET_SCHED_DISPATCH
50224 #define TARGET_SCHED_DISPATCH has_dispatch
50225 #undef TARGET_SCHED_DISPATCH_DO
50226 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50227 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50228 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50229 #undef TARGET_SCHED_REORDER
50230 #define TARGET_SCHED_REORDER ix86_sched_reorder
50231 #undef TARGET_SCHED_ADJUST_PRIORITY
50232 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50233 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50234 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50235 ix86_dependencies_evaluation_hook
50237 /* The size of the dispatch window is the total number of bytes of
50238 object code allowed in a window. */
50239 #define DISPATCH_WINDOW_SIZE 16
50241 /* Number of dispatch windows considered for scheduling. */
50242 #define MAX_DISPATCH_WINDOWS 3
50244 /* Maximum number of instructions in a window. */
50247 /* Maximum number of immediate operands in a window. */
50250 /* Maximum number of immediate bits allowed in a window. */
50251 #define MAX_IMM_SIZE 128
50253 /* Maximum number of 32 bit immediates allowed in a window. */
50254 #define MAX_IMM_32 4
50256 /* Maximum number of 64 bit immediates allowed in a window. */
50257 #define MAX_IMM_64 2
50259 /* Maximum total of loads or prefetches allowed in a window. */
50262 /* Maximum total of stores allowed in a window. */
50263 #define MAX_STORE 1
50269 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50270 enum dispatch_group {
50285 /* Number of allowable groups in a dispatch window. It is an array
50286 indexed by dispatch_group enum. 100 is used as a big number,
50287 because the number of these kind of operations does not have any
50288 effect in dispatch window, but we need them for other reasons in
50290 static unsigned int num_allowable_groups[disp_last] = {
50291 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50294 char group_name[disp_last + 1][16] = {
50295 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50296 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50297 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50300 /* Instruction path. */
50303 path_single, /* Single micro op. */
50304 path_double, /* Double micro op. */
50305 path_multi, /* Instructions with more than 2 micro op.. */
50309 /* sched_insn_info defines a window to the instructions scheduled in
50310 the basic block. It contains a pointer to the insn_info table and
50311 the instruction scheduled.
50313 Windows are allocated for each basic block and are linked
50315 typedef struct sched_insn_info_s {
50317 enum dispatch_group group;
50318 enum insn_path path;
50323 /* Linked list of dispatch windows. This is a two way list of
50324 dispatch windows of a basic block. It contains information about
50325 the number of uops in the window and the total number of
50326 instructions and of bytes in the object code for this dispatch
50328 typedef struct dispatch_windows_s {
50329 int num_insn; /* Number of insn in the window. */
50330 int num_uops; /* Number of uops in the window. */
50331 int window_size; /* Number of bytes in the window. */
50332 int window_num; /* Window number between 0 or 1. */
50333 int num_imm; /* Number of immediates in an insn. */
50334 int num_imm_32; /* Number of 32 bit immediates in an insn. */
50335 int num_imm_64; /* Number of 64 bit immediates in an insn. */
50336 int imm_size; /* Total immediates in the window. */
50337 int num_loads; /* Total memory loads in the window. */
50338 int num_stores; /* Total memory stores in the window. */
50339 int violation; /* Violation exists in window. */
50340 sched_insn_info *window; /* Pointer to the window. */
50341 struct dispatch_windows_s *next;
50342 struct dispatch_windows_s *prev;
50343 } dispatch_windows;
50345 /* Immediate valuse used in an insn. */
50346 typedef struct imm_info_s
50353 static dispatch_windows *dispatch_window_list;
50354 static dispatch_windows *dispatch_window_list1;
50356 /* Get dispatch group of insn. */
50358 static enum dispatch_group
50359 get_mem_group (rtx_insn *insn)
50361 enum attr_memory memory;
50363 if (INSN_CODE (insn) < 0)
50364 return disp_no_group;
50365 memory = get_attr_memory (insn);
50366 if (memory == MEMORY_STORE)
50369 if (memory == MEMORY_LOAD)
50372 if (memory == MEMORY_BOTH)
50373 return disp_load_store;
50375 return disp_no_group;
50378 /* Return true if insn is a compare instruction. */
50381 is_cmp (rtx_insn *insn)
50383 enum attr_type type;
50385 type = get_attr_type (insn);
50386 return (type == TYPE_TEST
50387 || type == TYPE_ICMP
50388 || type == TYPE_FCMP
50389 || GET_CODE (PATTERN (insn)) == COMPARE);
50392 /* Return true if a dispatch violation encountered. */
50395 dispatch_violation (void)
50397 if (dispatch_window_list->next)
50398 return dispatch_window_list->next->violation;
50399 return dispatch_window_list->violation;
50402 /* Return true if insn is a branch instruction. */
50405 is_branch (rtx insn)
50407 return (CALL_P (insn) || JUMP_P (insn));
50410 /* Return true if insn is a prefetch instruction. */
50413 is_prefetch (rtx insn)
50415 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
50418 /* This function initializes a dispatch window and the list container holding a
50419 pointer to the window. */
50422 init_window (int window_num)
50425 dispatch_windows *new_list;
50427 if (window_num == 0)
50428 new_list = dispatch_window_list;
50430 new_list = dispatch_window_list1;
50432 new_list->num_insn = 0;
50433 new_list->num_uops = 0;
50434 new_list->window_size = 0;
50435 new_list->next = NULL;
50436 new_list->prev = NULL;
50437 new_list->window_num = window_num;
50438 new_list->num_imm = 0;
50439 new_list->num_imm_32 = 0;
50440 new_list->num_imm_64 = 0;
50441 new_list->imm_size = 0;
50442 new_list->num_loads = 0;
50443 new_list->num_stores = 0;
50444 new_list->violation = false;
50446 for (i = 0; i < MAX_INSN; i++)
50448 new_list->window[i].insn = NULL;
50449 new_list->window[i].group = disp_no_group;
50450 new_list->window[i].path = no_path;
50451 new_list->window[i].byte_len = 0;
50452 new_list->window[i].imm_bytes = 0;
50457 /* This function allocates and initializes a dispatch window and the
50458 list container holding a pointer to the window. */
50460 static dispatch_windows *
50461 allocate_window (void)
50463 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
50464 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
50469 /* This routine initializes the dispatch scheduling information. It
50470 initiates building dispatch scheduler tables and constructs the
50471 first dispatch window. */
50474 init_dispatch_sched (void)
50476 /* Allocate a dispatch list and a window. */
50477 dispatch_window_list = allocate_window ();
50478 dispatch_window_list1 = allocate_window ();
50483 /* This function returns true if a branch is detected. End of a basic block
50484 does not have to be a branch, but here we assume only branches end a
50488 is_end_basic_block (enum dispatch_group group)
50490 return group == disp_branch;
50493 /* This function is called when the end of a window processing is reached. */
50496 process_end_window (void)
50498 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
50499 if (dispatch_window_list->next)
50501 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
50502 gcc_assert (dispatch_window_list->window_size
50503 + dispatch_window_list1->window_size <= 48);
50509 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
50510 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
50511 for 48 bytes of instructions. Note that these windows are not dispatch
50512 windows that their sizes are DISPATCH_WINDOW_SIZE. */
50514 static dispatch_windows *
50515 allocate_next_window (int window_num)
50517 if (window_num == 0)
50519 if (dispatch_window_list->next)
50522 return dispatch_window_list;
50525 dispatch_window_list->next = dispatch_window_list1;
50526 dispatch_window_list1->prev = dispatch_window_list;
50528 return dispatch_window_list1;
50531 /* Compute number of immediate operands of an instruction. */
50534 find_constant (rtx in_rtx, imm_info *imm_values)
50536 if (INSN_P (in_rtx))
50537 in_rtx = PATTERN (in_rtx);
50538 subrtx_iterator::array_type array;
50539 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
50540 if (const_rtx x = *iter)
50541 switch (GET_CODE (x))
50546 (imm_values->imm)++;
50547 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
50548 (imm_values->imm32)++;
50550 (imm_values->imm64)++;
50554 (imm_values->imm)++;
50555 (imm_values->imm64)++;
50559 if (LABEL_KIND (x) == LABEL_NORMAL)
50561 (imm_values->imm)++;
50562 (imm_values->imm32)++;
50571 /* Return total size of immediate operands of an instruction along with number
50572 of corresponding immediate-operands. It initializes its parameters to zero
50573 befor calling FIND_CONSTANT.
50574 INSN is the input instruction. IMM is the total of immediates.
50575 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
50579 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
50581 imm_info imm_values = {0, 0, 0};
50583 find_constant (insn, &imm_values);
50584 *imm = imm_values.imm;
50585 *imm32 = imm_values.imm32;
50586 *imm64 = imm_values.imm64;
50587 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
50590 /* This function indicates if an operand of an instruction is an
50594 has_immediate (rtx insn)
50596 int num_imm_operand;
50597 int num_imm32_operand;
50598 int num_imm64_operand;
50601 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50602 &num_imm64_operand);
50606 /* Return single or double path for instructions. */
50608 static enum insn_path
50609 get_insn_path (rtx_insn *insn)
50611 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
50613 if ((int)path == 0)
50614 return path_single;
50616 if ((int)path == 1)
50617 return path_double;
50622 /* Return insn dispatch group. */
50624 static enum dispatch_group
50625 get_insn_group (rtx_insn *insn)
50627 enum dispatch_group group = get_mem_group (insn);
50631 if (is_branch (insn))
50632 return disp_branch;
50637 if (has_immediate (insn))
50640 if (is_prefetch (insn))
50641 return disp_prefetch;
50643 return disp_no_group;
50646 /* Count number of GROUP restricted instructions in a dispatch
50647 window WINDOW_LIST. */
50650 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
50652 enum dispatch_group group = get_insn_group (insn);
50654 int num_imm_operand;
50655 int num_imm32_operand;
50656 int num_imm64_operand;
50658 if (group == disp_no_group)
50661 if (group == disp_imm)
50663 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50664 &num_imm64_operand);
50665 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
50666 || num_imm_operand + window_list->num_imm > MAX_IMM
50667 || (num_imm32_operand > 0
50668 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
50669 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
50670 || (num_imm64_operand > 0
50671 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
50672 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
50673 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
50674 && num_imm64_operand > 0
50675 && ((window_list->num_imm_64 > 0
50676 && window_list->num_insn >= 2)
50677 || window_list->num_insn >= 3)))
50683 if ((group == disp_load_store
50684 && (window_list->num_loads >= MAX_LOAD
50685 || window_list->num_stores >= MAX_STORE))
50686 || ((group == disp_load
50687 || group == disp_prefetch)
50688 && window_list->num_loads >= MAX_LOAD)
50689 || (group == disp_store
50690 && window_list->num_stores >= MAX_STORE))
50696 /* This function returns true if insn satisfies dispatch rules on the
50697 last window scheduled. */
50700 fits_dispatch_window (rtx_insn *insn)
50702 dispatch_windows *window_list = dispatch_window_list;
50703 dispatch_windows *window_list_next = dispatch_window_list->next;
50704 unsigned int num_restrict;
50705 enum dispatch_group group = get_insn_group (insn);
50706 enum insn_path path = get_insn_path (insn);
50709 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
50710 instructions should be given the lowest priority in the
50711 scheduling process in Haifa scheduler to make sure they will be
50712 scheduled in the same dispatch window as the reference to them. */
50713 if (group == disp_jcc || group == disp_cmp)
50716 /* Check nonrestricted. */
50717 if (group == disp_no_group || group == disp_branch)
50720 /* Get last dispatch window. */
50721 if (window_list_next)
50722 window_list = window_list_next;
50724 if (window_list->window_num == 1)
50726 sum = window_list->prev->window_size + window_list->window_size;
50729 || (min_insn_size (insn) + sum) >= 48)
50730 /* Window 1 is full. Go for next window. */
50734 num_restrict = count_num_restricted (insn, window_list);
50736 if (num_restrict > num_allowable_groups[group])
50739 /* See if it fits in the first window. */
50740 if (window_list->window_num == 0)
50742 /* The first widow should have only single and double path
50744 if (path == path_double
50745 && (window_list->num_uops + 2) > MAX_INSN)
50747 else if (path != path_single)
50753 /* Add an instruction INSN with NUM_UOPS micro-operations to the
50754 dispatch window WINDOW_LIST. */
50757 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
50759 int byte_len = min_insn_size (insn);
50760 int num_insn = window_list->num_insn;
50762 sched_insn_info *window = window_list->window;
50763 enum dispatch_group group = get_insn_group (insn);
50764 enum insn_path path = get_insn_path (insn);
50765 int num_imm_operand;
50766 int num_imm32_operand;
50767 int num_imm64_operand;
50769 if (!window_list->violation && group != disp_cmp
50770 && !fits_dispatch_window (insn))
50771 window_list->violation = true;
50773 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50774 &num_imm64_operand);
50776 /* Initialize window with new instruction. */
50777 window[num_insn].insn = insn;
50778 window[num_insn].byte_len = byte_len;
50779 window[num_insn].group = group;
50780 window[num_insn].path = path;
50781 window[num_insn].imm_bytes = imm_size;
50783 window_list->window_size += byte_len;
50784 window_list->num_insn = num_insn + 1;
50785 window_list->num_uops = window_list->num_uops + num_uops;
50786 window_list->imm_size += imm_size;
50787 window_list->num_imm += num_imm_operand;
50788 window_list->num_imm_32 += num_imm32_operand;
50789 window_list->num_imm_64 += num_imm64_operand;
50791 if (group == disp_store)
50792 window_list->num_stores += 1;
50793 else if (group == disp_load
50794 || group == disp_prefetch)
50795 window_list->num_loads += 1;
50796 else if (group == disp_load_store)
50798 window_list->num_stores += 1;
50799 window_list->num_loads += 1;
50803 /* Adds a scheduled instruction, INSN, to the current dispatch window.
50804 If the total bytes of instructions or the number of instructions in
50805 the window exceed allowable, it allocates a new window. */
50808 add_to_dispatch_window (rtx_insn *insn)
50811 dispatch_windows *window_list;
50812 dispatch_windows *next_list;
50813 dispatch_windows *window0_list;
50814 enum insn_path path;
50815 enum dispatch_group insn_group;
50823 if (INSN_CODE (insn) < 0)
50826 byte_len = min_insn_size (insn);
50827 window_list = dispatch_window_list;
50828 next_list = window_list->next;
50829 path = get_insn_path (insn);
50830 insn_group = get_insn_group (insn);
50832 /* Get the last dispatch window. */
50834 window_list = dispatch_window_list->next;
50836 if (path == path_single)
50838 else if (path == path_double)
50841 insn_num_uops = (int) path;
50843 /* If current window is full, get a new window.
50844 Window number zero is full, if MAX_INSN uops are scheduled in it.
50845 Window number one is full, if window zero's bytes plus window
50846 one's bytes is 32, or if the bytes of the new instruction added
50847 to the total makes it greater than 48, or it has already MAX_INSN
50848 instructions in it. */
50849 num_insn = window_list->num_insn;
50850 num_uops = window_list->num_uops;
50851 window_num = window_list->window_num;
50852 insn_fits = fits_dispatch_window (insn);
50854 if (num_insn >= MAX_INSN
50855 || num_uops + insn_num_uops > MAX_INSN
50858 window_num = ~window_num & 1;
50859 window_list = allocate_next_window (window_num);
50862 if (window_num == 0)
50864 add_insn_window (insn, window_list, insn_num_uops);
50865 if (window_list->num_insn >= MAX_INSN
50866 && insn_group == disp_branch)
50868 process_end_window ();
50872 else if (window_num == 1)
50874 window0_list = window_list->prev;
50875 sum = window0_list->window_size + window_list->window_size;
50877 || (byte_len + sum) >= 48)
50879 process_end_window ();
50880 window_list = dispatch_window_list;
50883 add_insn_window (insn, window_list, insn_num_uops);
50886 gcc_unreachable ();
50888 if (is_end_basic_block (insn_group))
50890 /* End of basic block is reached do end-basic-block process. */
50891 process_end_window ();
50896 /* Print the dispatch window, WINDOW_NUM, to FILE. */
50898 DEBUG_FUNCTION static void
50899 debug_dispatch_window_file (FILE *file, int window_num)
50901 dispatch_windows *list;
50904 if (window_num == 0)
50905 list = dispatch_window_list;
50907 list = dispatch_window_list1;
50909 fprintf (file, "Window #%d:\n", list->window_num);
50910 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
50911 list->num_insn, list->num_uops, list->window_size);
50912 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
50913 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
50915 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
50917 fprintf (file, " insn info:\n");
50919 for (i = 0; i < MAX_INSN; i++)
50921 if (!list->window[i].insn)
50923 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
50924 i, group_name[list->window[i].group],
50925 i, (void *)list->window[i].insn,
50926 i, list->window[i].path,
50927 i, list->window[i].byte_len,
50928 i, list->window[i].imm_bytes);
50932 /* Print to stdout a dispatch window. */
50934 DEBUG_FUNCTION void
50935 debug_dispatch_window (int window_num)
50937 debug_dispatch_window_file (stdout, window_num);
50940 /* Print INSN dispatch information to FILE. */
50942 DEBUG_FUNCTION static void
50943 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
50946 enum insn_path path;
50947 enum dispatch_group group;
50949 int num_imm_operand;
50950 int num_imm32_operand;
50951 int num_imm64_operand;
50953 if (INSN_CODE (insn) < 0)
50956 byte_len = min_insn_size (insn);
50957 path = get_insn_path (insn);
50958 group = get_insn_group (insn);
50959 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50960 &num_imm64_operand);
50962 fprintf (file, " insn info:\n");
50963 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
50964 group_name[group], path, byte_len);
50965 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
50966 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
50969 /* Print to STDERR the status of the ready list with respect to
50970 dispatch windows. */
50972 DEBUG_FUNCTION void
50973 debug_ready_dispatch (void)
50976 int no_ready = number_in_ready ();
50978 fprintf (stdout, "Number of ready: %d\n", no_ready);
50980 for (i = 0; i < no_ready; i++)
50981 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
50984 /* This routine is the driver of the dispatch scheduler. */
50987 do_dispatch (rtx_insn *insn, int mode)
50989 if (mode == DISPATCH_INIT)
50990 init_dispatch_sched ();
50991 else if (mode == ADD_TO_DISPATCH_WINDOW)
50992 add_to_dispatch_window (insn);
50995 /* Return TRUE if Dispatch Scheduling is supported. */
50998 has_dispatch (rtx_insn *insn, int action)
51000 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
51001 && flag_dispatch_scheduler)
51007 case IS_DISPATCH_ON:
51012 return is_cmp (insn);
51014 case DISPATCH_VIOLATION:
51015 return dispatch_violation ();
51017 case FITS_DISPATCH_WINDOW:
51018 return fits_dispatch_window (insn);
51024 /* Implementation of reassociation_width target hook used by
51025 reassoc phase to identify parallelism level in reassociated
51026 tree. Statements tree_code is passed in OPC. Arguments type
51029 Currently parallel reassociation is enabled for Atom
51030 processors only and we set reassociation width to be 2
51031 because Atom may issue up to 2 instructions per cycle.
51033 Return value should be fixed if parallel reassociation is
51034 enabled for other processors. */
51037 ix86_reassociation_width (unsigned int, machine_mode mode)
51040 if (VECTOR_MODE_P (mode))
51042 if (TARGET_VECTOR_PARALLEL_EXECUTION)
51049 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
51051 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
51057 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
51058 place emms and femms instructions. */
51060 static machine_mode
51061 ix86_preferred_simd_mode (machine_mode mode)
51069 return TARGET_AVX512BW ? V64QImode :
51070 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
51072 return TARGET_AVX512BW ? V32HImode :
51073 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
51075 return TARGET_AVX512F ? V16SImode :
51076 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
51078 return TARGET_AVX512F ? V8DImode :
51079 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
51082 if (TARGET_AVX512F)
51084 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51090 if (!TARGET_VECTORIZE_DOUBLE)
51092 else if (TARGET_AVX512F)
51094 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51096 else if (TARGET_SSE2)
51105 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
51106 vectors. If AVX512F is enabled then try vectorizing with 512bit,
51107 256bit and 128bit vectors. */
51109 static unsigned int
51110 ix86_autovectorize_vector_sizes (void)
51112 return TARGET_AVX512F ? 64 | 32 | 16 :
51113 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
51118 /* Return class of registers which could be used for pseudo of MODE
51119 and of class RCLASS for spilling instead of memory. Return NO_REGS
51120 if it is not possible or non-profitable. */
51122 ix86_spill_class (reg_class_t rclass, machine_mode mode)
51124 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
51125 && (mode == SImode || (TARGET_64BIT && mode == DImode))
51126 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
51127 return ALL_SSE_REGS;
51131 /* Implement targetm.vectorize.init_cost. */
51134 ix86_init_cost (struct loop *)
51136 unsigned *cost = XNEWVEC (unsigned, 3);
51137 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51141 /* Implement targetm.vectorize.add_stmt_cost. */
51144 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51145 struct _stmt_vec_info *stmt_info, int misalign,
51146 enum vect_cost_model_location where)
51148 unsigned *cost = (unsigned *) data;
51149 unsigned retval = 0;
51151 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51152 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51154 /* Statements in an inner loop relative to the loop being
51155 vectorized are weighted more heavily. The value here is
51156 arbitrary and could potentially be improved with analysis. */
51157 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51158 count *= 50; /* FIXME. */
51160 retval = (unsigned) (count * stmt_cost);
51162 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51163 for Silvermont as it has out of order integer pipeline and can execute
51164 2 scalar instruction per tick, but has in order SIMD pipeline. */
51165 if (TARGET_SILVERMONT || TARGET_INTEL)
51166 if (stmt_info && stmt_info->stmt)
51168 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51169 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51170 retval = (retval * 17) / 10;
51173 cost[where] += retval;
51178 /* Implement targetm.vectorize.finish_cost. */
51181 ix86_finish_cost (void *data, unsigned *prologue_cost,
51182 unsigned *body_cost, unsigned *epilogue_cost)
51184 unsigned *cost = (unsigned *) data;
51185 *prologue_cost = cost[vect_prologue];
51186 *body_cost = cost[vect_body];
51187 *epilogue_cost = cost[vect_epilogue];
51190 /* Implement targetm.vectorize.destroy_cost_data. */
51193 ix86_destroy_cost_data (void *data)
51198 /* Validate target specific memory model bits in VAL. */
51200 static unsigned HOST_WIDE_INT
51201 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51203 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
51206 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51208 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51210 warning (OPT_Winvalid_memory_model,
51211 "Unknown architecture specific memory model");
51212 return MEMMODEL_SEQ_CST;
51214 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
51215 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
51217 warning (OPT_Winvalid_memory_model,
51218 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51219 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51221 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
51223 warning (OPT_Winvalid_memory_model,
51224 "HLE_RELEASE not used with RELEASE or stronger memory model");
51225 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51230 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51231 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51232 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
51233 or number of vecsize_mangle variants that should be emitted. */
51236 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51237 struct cgraph_simd_clone *clonei,
51238 tree base_type, int num)
51242 if (clonei->simdlen
51243 && (clonei->simdlen < 2
51244 || clonei->simdlen > 16
51245 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51247 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51248 "unsupported simdlen %d", clonei->simdlen);
51252 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51253 if (TREE_CODE (ret_type) != VOID_TYPE)
51254 switch (TYPE_MODE (ret_type))
51266 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51267 "unsupported return type %qT for simd\n", ret_type);
51274 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51275 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
51276 switch (TYPE_MODE (TREE_TYPE (t)))
51288 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51289 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51293 if (clonei->cilk_elemental)
51295 /* Parse here processor clause. If not present, default to 'b'. */
51296 clonei->vecsize_mangle = 'b';
51298 else if (!TREE_PUBLIC (node->decl))
51300 /* If the function isn't exported, we can pick up just one ISA
51303 clonei->vecsize_mangle = 'd';
51304 else if (TARGET_AVX)
51305 clonei->vecsize_mangle = 'c';
51307 clonei->vecsize_mangle = 'b';
51312 clonei->vecsize_mangle = "bcd"[num];
51315 switch (clonei->vecsize_mangle)
51318 clonei->vecsize_int = 128;
51319 clonei->vecsize_float = 128;
51322 clonei->vecsize_int = 128;
51323 clonei->vecsize_float = 256;
51326 clonei->vecsize_int = 256;
51327 clonei->vecsize_float = 256;
51330 if (clonei->simdlen == 0)
51332 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51333 clonei->simdlen = clonei->vecsize_int;
51335 clonei->simdlen = clonei->vecsize_float;
51336 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51337 if (clonei->simdlen > 16)
51338 clonei->simdlen = 16;
51343 /* Add target attribute to SIMD clone NODE if needed. */
51346 ix86_simd_clone_adjust (struct cgraph_node *node)
51348 const char *str = NULL;
51349 gcc_assert (node->decl == cfun->decl);
51350 switch (node->simdclone->vecsize_mangle)
51365 gcc_unreachable ();
51370 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51371 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51374 ix86_reset_previous_fndecl ();
51375 ix86_set_current_function (node->decl);
51378 /* If SIMD clone NODE can't be used in a vectorized loop
51379 in current function, return -1, otherwise return a badness of using it
51380 (0 if it is most desirable from vecsize_mangle point of view, 1
51381 slightly less desirable, etc.). */
51384 ix86_simd_clone_usable (struct cgraph_node *node)
51386 switch (node->simdclone->vecsize_mangle)
51393 return TARGET_AVX2 ? 2 : 1;
51397 return TARGET_AVX2 ? 1 : 0;
51404 gcc_unreachable ();
51408 /* This function adjusts the unroll factor based on
51409 the hardware capabilities. For ex, bdver3 has
51410 a loop buffer which makes unrolling of smaller
51411 loops less important. This function decides the
51412 unroll factor using number of memory references
51413 (value 32 is used) as a heuristic. */
51416 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
51421 unsigned mem_count = 0;
51423 if (!TARGET_ADJUST_UNROLL)
51426 /* Count the number of memory references within the loop body.
51427 This value determines the unrolling factor for bdver3 and bdver4
51429 subrtx_iterator::array_type array;
51430 bbs = get_loop_body (loop);
51431 for (i = 0; i < loop->num_nodes; i++)
51432 FOR_BB_INSNS (bbs[i], insn)
51433 if (NONDEBUG_INSN_P (insn))
51434 FOR_EACH_SUBRTX (iter, array, insn, NONCONST)
51435 if (const_rtx x = *iter)
51438 machine_mode mode = GET_MODE (x);
51439 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
51447 if (mem_count && mem_count <=32)
51448 return 32/mem_count;
51454 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
51457 ix86_float_exceptions_rounding_supported_p (void)
51459 /* For x87 floating point with standard excess precision handling,
51460 there is no adddf3 pattern (since x87 floating point only has
51461 XFmode operations) so the default hook implementation gets this
51463 return TARGET_80387 || TARGET_SSE_MATH;
51466 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
51469 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
51471 if (!TARGET_80387 && !TARGET_SSE_MATH)
51473 tree exceptions_var = create_tmp_var (integer_type_node);
51476 tree fenv_index_type = build_index_type (size_int (6));
51477 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
51478 tree fenv_var = create_tmp_var (fenv_type);
51479 mark_addressable (fenv_var);
51480 tree fenv_ptr = build_pointer_type (fenv_type);
51481 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
51482 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
51483 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
51484 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
51485 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
51486 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
51487 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
51488 tree hold_fnclex = build_call_expr (fnclex, 0);
51489 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
51491 *clear = build_call_expr (fnclex, 0);
51492 tree sw_var = create_tmp_var (short_unsigned_type_node);
51493 tree fnstsw_call = build_call_expr (fnstsw, 0);
51494 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
51495 sw_var, fnstsw_call);
51496 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
51497 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
51498 exceptions_var, exceptions_x87);
51499 *update = build2 (COMPOUND_EXPR, integer_type_node,
51500 sw_mod, update_mod);
51501 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
51502 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
51504 if (TARGET_SSE_MATH)
51506 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node);
51507 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node);
51508 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
51509 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
51510 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
51511 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
51512 mxcsr_orig_var, stmxcsr_hold_call);
51513 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
51515 build_int_cst (unsigned_type_node, 0x1f80));
51516 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
51517 build_int_cst (unsigned_type_node, 0xffffffc0));
51518 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
51519 mxcsr_mod_var, hold_mod_val);
51520 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51521 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
51522 hold_assign_orig, hold_assign_mod);
51523 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
51524 ldmxcsr_hold_call);
51526 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
51529 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51531 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
51532 ldmxcsr_clear_call);
51534 *clear = ldmxcsr_clear_call;
51535 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
51536 tree exceptions_sse = fold_convert (integer_type_node,
51537 stxmcsr_update_call);
51540 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
51541 exceptions_var, exceptions_sse);
51542 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
51543 exceptions_var, exceptions_mod);
51544 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
51545 exceptions_assign);
51548 *update = build2 (MODIFY_EXPR, integer_type_node,
51549 exceptions_var, exceptions_sse);
51550 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
51551 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51552 ldmxcsr_update_call);
51554 tree atomic_feraiseexcept
51555 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
51556 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
51557 1, exceptions_var);
51558 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51559 atomic_feraiseexcept_call);
51562 /* Return mode to be used for bounds or VOIDmode
51563 if bounds are not supported. */
51565 static enum machine_mode
51566 ix86_mpx_bound_mode ()
51568 /* Do not support pointer checker if MPX
51572 if (flag_check_pointer_bounds)
51573 warning (0, "Pointer Checker requires MPX support on this target."
51574 " Use -mmpx options to enable MPX.");
51581 /* Return constant used to statically initialize constant bounds.
51583 This function is used to create special bound values. For now
51584 only INIT bounds and NONE bounds are expected. More special
51585 values may be added later. */
51588 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
51590 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
51591 : build_zero_cst (pointer_sized_int_node);
51592 tree high = ub ? build_zero_cst (pointer_sized_int_node)
51593 : build_minus_one_cst (pointer_sized_int_node);
51595 /* This function is supposed to be used to create INIT and
51596 NONE bounds only. */
51597 gcc_assert ((lb == 0 && ub == -1)
51598 || (lb == -1 && ub == 0));
51600 return build_complex (NULL, low, high);
51603 /* Generate a list of statements STMTS to initialize pointer bounds
51604 variable VAR with bounds LB and UB. Return the number of generated
51608 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
51610 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
51611 tree lhs, modify, var_p;
51613 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
51614 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
51616 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
51617 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
51618 append_to_statement_list (modify, stmts);
51620 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
51621 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
51622 TYPE_SIZE_UNIT (pointer_sized_int_node)));
51623 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
51624 append_to_statement_list (modify, stmts);
51629 /* Initialize the GCC target structure. */
51630 #undef TARGET_RETURN_IN_MEMORY
51631 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
51633 #undef TARGET_LEGITIMIZE_ADDRESS
51634 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
51636 #undef TARGET_ATTRIBUTE_TABLE
51637 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
51638 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
51639 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
51640 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51641 # undef TARGET_MERGE_DECL_ATTRIBUTES
51642 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
51645 #undef TARGET_COMP_TYPE_ATTRIBUTES
51646 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
51648 #undef TARGET_INIT_BUILTINS
51649 #define TARGET_INIT_BUILTINS ix86_init_builtins
51650 #undef TARGET_BUILTIN_DECL
51651 #define TARGET_BUILTIN_DECL ix86_builtin_decl
51652 #undef TARGET_EXPAND_BUILTIN
51653 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
51655 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
51656 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
51657 ix86_builtin_vectorized_function
51659 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
51660 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
51662 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
51663 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
51665 #undef TARGET_VECTORIZE_BUILTIN_GATHER
51666 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
51668 #undef TARGET_BUILTIN_RECIPROCAL
51669 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
51671 #undef TARGET_ASM_FUNCTION_EPILOGUE
51672 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
51674 #undef TARGET_ENCODE_SECTION_INFO
51675 #ifndef SUBTARGET_ENCODE_SECTION_INFO
51676 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
51678 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
51681 #undef TARGET_ASM_OPEN_PAREN
51682 #define TARGET_ASM_OPEN_PAREN ""
51683 #undef TARGET_ASM_CLOSE_PAREN
51684 #define TARGET_ASM_CLOSE_PAREN ""
51686 #undef TARGET_ASM_BYTE_OP
51687 #define TARGET_ASM_BYTE_OP ASM_BYTE
51689 #undef TARGET_ASM_ALIGNED_HI_OP
51690 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
51691 #undef TARGET_ASM_ALIGNED_SI_OP
51692 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
51694 #undef TARGET_ASM_ALIGNED_DI_OP
51695 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
51698 #undef TARGET_PROFILE_BEFORE_PROLOGUE
51699 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
51701 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
51702 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
51704 #undef TARGET_ASM_UNALIGNED_HI_OP
51705 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
51706 #undef TARGET_ASM_UNALIGNED_SI_OP
51707 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
51708 #undef TARGET_ASM_UNALIGNED_DI_OP
51709 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
51711 #undef TARGET_PRINT_OPERAND
51712 #define TARGET_PRINT_OPERAND ix86_print_operand
51713 #undef TARGET_PRINT_OPERAND_ADDRESS
51714 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
51715 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
51716 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
51717 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
51718 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
51720 #undef TARGET_SCHED_INIT_GLOBAL
51721 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
51722 #undef TARGET_SCHED_ADJUST_COST
51723 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
51724 #undef TARGET_SCHED_ISSUE_RATE
51725 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
51726 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
51727 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
51728 ia32_multipass_dfa_lookahead
51729 #undef TARGET_SCHED_MACRO_FUSION_P
51730 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
51731 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
51732 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
51734 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
51735 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
51737 #undef TARGET_MEMMODEL_CHECK
51738 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
51740 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
51741 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
51744 #undef TARGET_HAVE_TLS
51745 #define TARGET_HAVE_TLS true
51747 #undef TARGET_CANNOT_FORCE_CONST_MEM
51748 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
51749 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
51750 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
51752 #undef TARGET_DELEGITIMIZE_ADDRESS
51753 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
51755 #undef TARGET_MS_BITFIELD_LAYOUT_P
51756 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
51759 #undef TARGET_BINDS_LOCAL_P
51760 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
51762 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51763 #undef TARGET_BINDS_LOCAL_P
51764 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
51767 #undef TARGET_ASM_OUTPUT_MI_THUNK
51768 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
51769 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
51770 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
51772 #undef TARGET_ASM_FILE_START
51773 #define TARGET_ASM_FILE_START x86_file_start
51775 #undef TARGET_OPTION_OVERRIDE
51776 #define TARGET_OPTION_OVERRIDE ix86_option_override
51778 #undef TARGET_REGISTER_MOVE_COST
51779 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
51780 #undef TARGET_MEMORY_MOVE_COST
51781 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
51782 #undef TARGET_RTX_COSTS
51783 #define TARGET_RTX_COSTS ix86_rtx_costs
51784 #undef TARGET_ADDRESS_COST
51785 #define TARGET_ADDRESS_COST ix86_address_cost
51787 #undef TARGET_FIXED_CONDITION_CODE_REGS
51788 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
51789 #undef TARGET_CC_MODES_COMPATIBLE
51790 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
51792 #undef TARGET_MACHINE_DEPENDENT_REORG
51793 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
51795 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
51796 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
51798 #undef TARGET_BUILD_BUILTIN_VA_LIST
51799 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
51801 #undef TARGET_FOLD_BUILTIN
51802 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
51804 #undef TARGET_COMPARE_VERSION_PRIORITY
51805 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
51807 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
51808 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
51809 ix86_generate_version_dispatcher_body
51811 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
51812 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
51813 ix86_get_function_versions_dispatcher
51815 #undef TARGET_ENUM_VA_LIST_P
51816 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
51818 #undef TARGET_FN_ABI_VA_LIST
51819 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
51821 #undef TARGET_CANONICAL_VA_LIST_TYPE
51822 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
51824 #undef TARGET_EXPAND_BUILTIN_VA_START
51825 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
51827 #undef TARGET_MD_ASM_CLOBBERS
51828 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
51830 #undef TARGET_PROMOTE_PROTOTYPES
51831 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
51832 #undef TARGET_SETUP_INCOMING_VARARGS
51833 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
51834 #undef TARGET_MUST_PASS_IN_STACK
51835 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
51836 #undef TARGET_FUNCTION_ARG_ADVANCE
51837 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
51838 #undef TARGET_FUNCTION_ARG
51839 #define TARGET_FUNCTION_ARG ix86_function_arg
51840 #undef TARGET_INIT_PIC_REG
51841 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
51842 #undef TARGET_USE_PSEUDO_PIC_REG
51843 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
51844 #undef TARGET_FUNCTION_ARG_BOUNDARY
51845 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
51846 #undef TARGET_PASS_BY_REFERENCE
51847 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
51848 #undef TARGET_INTERNAL_ARG_POINTER
51849 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
51850 #undef TARGET_UPDATE_STACK_BOUNDARY
51851 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
51852 #undef TARGET_GET_DRAP_RTX
51853 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
51854 #undef TARGET_STRICT_ARGUMENT_NAMING
51855 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
51856 #undef TARGET_STATIC_CHAIN
51857 #define TARGET_STATIC_CHAIN ix86_static_chain
51858 #undef TARGET_TRAMPOLINE_INIT
51859 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
51860 #undef TARGET_RETURN_POPS_ARGS
51861 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
51863 #undef TARGET_LEGITIMATE_COMBINED_INSN
51864 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
51866 #undef TARGET_ASAN_SHADOW_OFFSET
51867 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
51869 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
51870 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
51872 #undef TARGET_SCALAR_MODE_SUPPORTED_P
51873 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
51875 #undef TARGET_VECTOR_MODE_SUPPORTED_P
51876 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
51878 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
51879 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
51880 ix86_libgcc_floating_mode_supported_p
51882 #undef TARGET_C_MODE_FOR_SUFFIX
51883 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
51886 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
51887 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
51890 #ifdef SUBTARGET_INSERT_ATTRIBUTES
51891 #undef TARGET_INSERT_ATTRIBUTES
51892 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
51895 #undef TARGET_MANGLE_TYPE
51896 #define TARGET_MANGLE_TYPE ix86_mangle_type
51899 #undef TARGET_STACK_PROTECT_FAIL
51900 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
51903 #undef TARGET_FUNCTION_VALUE
51904 #define TARGET_FUNCTION_VALUE ix86_function_value
51906 #undef TARGET_FUNCTION_VALUE_REGNO_P
51907 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
51909 #undef TARGET_PROMOTE_FUNCTION_MODE
51910 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
51912 #undef TARGET_MEMBER_TYPE_FORCES_BLK
51913 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
51915 #undef TARGET_INSTANTIATE_DECLS
51916 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
51918 #undef TARGET_SECONDARY_RELOAD
51919 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
51921 #undef TARGET_CLASS_MAX_NREGS
51922 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
51924 #undef TARGET_PREFERRED_RELOAD_CLASS
51925 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
51926 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
51927 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
51928 #undef TARGET_CLASS_LIKELY_SPILLED_P
51929 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
51931 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
51932 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
51933 ix86_builtin_vectorization_cost
51934 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
51935 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
51936 ix86_vectorize_vec_perm_const_ok
51937 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
51938 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
51939 ix86_preferred_simd_mode
51940 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
51941 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
51942 ix86_autovectorize_vector_sizes
51943 #undef TARGET_VECTORIZE_INIT_COST
51944 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
51945 #undef TARGET_VECTORIZE_ADD_STMT_COST
51946 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
51947 #undef TARGET_VECTORIZE_FINISH_COST
51948 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
51949 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
51950 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
51952 #undef TARGET_SET_CURRENT_FUNCTION
51953 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
51955 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
51956 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
51958 #undef TARGET_OPTION_SAVE
51959 #define TARGET_OPTION_SAVE ix86_function_specific_save
51961 #undef TARGET_OPTION_RESTORE
51962 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
51964 #undef TARGET_OPTION_PRINT
51965 #define TARGET_OPTION_PRINT ix86_function_specific_print
51967 #undef TARGET_OPTION_FUNCTION_VERSIONS
51968 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
51970 #undef TARGET_CAN_INLINE_P
51971 #define TARGET_CAN_INLINE_P ix86_can_inline_p
51973 #undef TARGET_EXPAND_TO_RTL_HOOK
51974 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
51976 #undef TARGET_LEGITIMATE_ADDRESS_P
51977 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
51979 #undef TARGET_LRA_P
51980 #define TARGET_LRA_P hook_bool_void_true
51982 #undef TARGET_REGISTER_PRIORITY
51983 #define TARGET_REGISTER_PRIORITY ix86_register_priority
51985 #undef TARGET_REGISTER_USAGE_LEVELING_P
51986 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
51988 #undef TARGET_LEGITIMATE_CONSTANT_P
51989 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
51991 #undef TARGET_FRAME_POINTER_REQUIRED
51992 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
51994 #undef TARGET_CAN_ELIMINATE
51995 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
51997 #undef TARGET_EXTRA_LIVE_ON_ENTRY
51998 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
52000 #undef TARGET_ASM_CODE_END
52001 #define TARGET_ASM_CODE_END ix86_code_end
52003 #undef TARGET_CONDITIONAL_REGISTER_USAGE
52004 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
52007 #undef TARGET_INIT_LIBFUNCS
52008 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
52011 #undef TARGET_LOOP_UNROLL_ADJUST
52012 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
52014 #undef TARGET_SPILL_CLASS
52015 #define TARGET_SPILL_CLASS ix86_spill_class
52017 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
52018 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
52019 ix86_simd_clone_compute_vecsize_and_simdlen
52021 #undef TARGET_SIMD_CLONE_ADJUST
52022 #define TARGET_SIMD_CLONE_ADJUST \
52023 ix86_simd_clone_adjust
52025 #undef TARGET_SIMD_CLONE_USABLE
52026 #define TARGET_SIMD_CLONE_USABLE \
52027 ix86_simd_clone_usable
52029 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
52030 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
52031 ix86_float_exceptions_rounding_supported_p
52033 #undef TARGET_MODE_EMIT
52034 #define TARGET_MODE_EMIT ix86_emit_mode_set
52036 #undef TARGET_MODE_NEEDED
52037 #define TARGET_MODE_NEEDED ix86_mode_needed
52039 #undef TARGET_MODE_AFTER
52040 #define TARGET_MODE_AFTER ix86_mode_after
52042 #undef TARGET_MODE_ENTRY
52043 #define TARGET_MODE_ENTRY ix86_mode_entry
52045 #undef TARGET_MODE_EXIT
52046 #define TARGET_MODE_EXIT ix86_mode_exit
52048 #undef TARGET_MODE_PRIORITY
52049 #define TARGET_MODE_PRIORITY ix86_mode_priority
52051 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
52052 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
52054 #undef TARGET_LOAD_BOUNDS_FOR_ARG
52055 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
52057 #undef TARGET_STORE_BOUNDS_FOR_ARG
52058 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
52060 #undef TARGET_LOAD_RETURNED_BOUNDS
52061 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
52063 #undef TARGET_STORE_RETURNED_BOUNDS
52064 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
52066 #undef TARGET_CHKP_BOUND_MODE
52067 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
52069 #undef TARGET_BUILTIN_CHKP_FUNCTION
52070 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
52072 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
52073 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
52075 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
52076 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
52078 #undef TARGET_CHKP_INITIALIZE_BOUNDS
52079 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
52081 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
52082 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
52084 #undef TARGET_OFFLOAD_OPTIONS
52085 #define TARGET_OFFLOAD_OPTIONS \
52086 ix86_offload_options
52088 struct gcc_target targetm = TARGET_INITIALIZER;
52090 #include "gt-i386.h"