1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
31 #include "stringpool.h"
38 #include "diagnostic.h"
41 #include "fold-const.h"
44 #include "stor-layout.h"
47 #include "insn-attr.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
58 #include "tm-constrs.h"
61 #include "sched-int.h"
63 #include "tree-pass.h"
65 #include "pass_manager.h"
66 #include "target-globals.h"
67 #include "gimple-iterator.h"
68 #include "tree-vectorizer.h"
69 #include "shrink-wrap.h"
72 #include "tree-iterator.h"
73 #include "tree-chkp.h"
76 #include "case-cfn-macros.h"
77 #include "regrename.h"
80 /* This file should be included last. */
81 #include "target-def.h"
83 static rtx legitimize_dllimport_symbol (rtx, bool);
84 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
85 static rtx legitimize_pe_coff_symbol (rtx, bool);
86 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
88 #ifndef CHECK_STACK_LIMIT
89 #define CHECK_STACK_LIMIT (-1)
92 /* Return index of given mode in mult and division cost tables. */
93 #define MODE_INDEX(mode) \
94 ((mode) == QImode ? 0 \
95 : (mode) == HImode ? 1 \
96 : (mode) == SImode ? 2 \
97 : (mode) == DImode ? 3 \
100 /* Processor costs (relative to an add) */
101 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
102 #define COSTS_N_BYTES(N) ((N) * 2)
104 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
106 static stringop_algs ix86_size_memcpy[2] = {
107 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
108 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
109 static stringop_algs ix86_size_memset[2] = {
110 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
111 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
114 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
115 COSTS_N_BYTES (2), /* cost of an add instruction */
116 COSTS_N_BYTES (3), /* cost of a lea instruction */
117 COSTS_N_BYTES (2), /* variable shift costs */
118 COSTS_N_BYTES (3), /* constant shift costs */
119 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
120 COSTS_N_BYTES (3), /* HI */
121 COSTS_N_BYTES (3), /* SI */
122 COSTS_N_BYTES (3), /* DI */
123 COSTS_N_BYTES (5)}, /* other */
124 0, /* cost of multiply per each bit set */
125 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
126 COSTS_N_BYTES (3), /* HI */
127 COSTS_N_BYTES (3), /* SI */
128 COSTS_N_BYTES (3), /* DI */
129 COSTS_N_BYTES (5)}, /* other */
130 COSTS_N_BYTES (3), /* cost of movsx */
131 COSTS_N_BYTES (3), /* cost of movzx */
132 0, /* "large" insn */
134 2, /* cost for loading QImode using movzbl */
135 {2, 2, 2}, /* cost of loading integer registers
136 in QImode, HImode and SImode.
137 Relative to reg-reg move (2). */
138 {2, 2, 2}, /* cost of storing integer registers */
139 2, /* cost of reg,reg fld/fst */
140 {2, 2, 2}, /* cost of loading fp registers
141 in SFmode, DFmode and XFmode */
142 {2, 2, 2}, /* cost of storing fp registers
143 in SFmode, DFmode and XFmode */
144 3, /* cost of moving MMX register */
145 {3, 3}, /* cost of loading MMX registers
146 in SImode and DImode */
147 {3, 3}, /* cost of storing MMX registers
148 in SImode and DImode */
149 3, /* cost of moving SSE register */
150 {3, 3, 3}, /* cost of loading SSE registers
151 in SImode, DImode and TImode */
152 {3, 3, 3}, /* cost of storing SSE registers
153 in SImode, DImode and TImode */
154 3, /* MMX or SSE register to integer */
155 0, /* size of l1 cache */
156 0, /* size of l2 cache */
157 0, /* size of prefetch block */
158 0, /* number of parallel prefetches */
160 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
161 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
162 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
163 COSTS_N_BYTES (2), /* cost of FABS instruction. */
164 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
165 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
168 1, /* scalar_stmt_cost. */
169 1, /* scalar load_cost. */
170 1, /* scalar_store_cost. */
171 1, /* vec_stmt_cost. */
172 1, /* vec_to_scalar_cost. */
173 1, /* scalar_to_vec_cost. */
174 1, /* vec_align_load_cost. */
175 1, /* vec_unalign_load_cost. */
176 1, /* vec_store_cost. */
177 1, /* cond_taken_branch_cost. */
178 1, /* cond_not_taken_branch_cost. */
181 /* Processor costs (relative to an add) */
182 static stringop_algs i386_memcpy[2] = {
183 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
184 DUMMY_STRINGOP_ALGS};
185 static stringop_algs i386_memset[2] = {
186 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
187 DUMMY_STRINGOP_ALGS};
190 struct processor_costs i386_cost = { /* 386 specific costs */
191 COSTS_N_INSNS (1), /* cost of an add instruction */
192 COSTS_N_INSNS (1), /* cost of a lea instruction */
193 COSTS_N_INSNS (3), /* variable shift costs */
194 COSTS_N_INSNS (2), /* constant shift costs */
195 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
196 COSTS_N_INSNS (6), /* HI */
197 COSTS_N_INSNS (6), /* SI */
198 COSTS_N_INSNS (6), /* DI */
199 COSTS_N_INSNS (6)}, /* other */
200 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
201 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
202 COSTS_N_INSNS (23), /* HI */
203 COSTS_N_INSNS (23), /* SI */
204 COSTS_N_INSNS (23), /* DI */
205 COSTS_N_INSNS (23)}, /* other */
206 COSTS_N_INSNS (3), /* cost of movsx */
207 COSTS_N_INSNS (2), /* cost of movzx */
208 15, /* "large" insn */
210 4, /* cost for loading QImode using movzbl */
211 {2, 4, 2}, /* cost of loading integer registers
212 in QImode, HImode and SImode.
213 Relative to reg-reg move (2). */
214 {2, 4, 2}, /* cost of storing integer registers */
215 2, /* cost of reg,reg fld/fst */
216 {8, 8, 8}, /* cost of loading fp registers
217 in SFmode, DFmode and XFmode */
218 {8, 8, 8}, /* cost of storing fp registers
219 in SFmode, DFmode and XFmode */
220 2, /* cost of moving MMX register */
221 {4, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {4, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of l1 cache */
232 0, /* size of l2 cache */
233 0, /* size of prefetch block */
234 0, /* number of parallel prefetches */
236 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
237 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
238 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
239 COSTS_N_INSNS (22), /* cost of FABS instruction. */
240 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
241 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
244 1, /* scalar_stmt_cost. */
245 1, /* scalar load_cost. */
246 1, /* scalar_store_cost. */
247 1, /* vec_stmt_cost. */
248 1, /* vec_to_scalar_cost. */
249 1, /* scalar_to_vec_cost. */
250 1, /* vec_align_load_cost. */
251 2, /* vec_unalign_load_cost. */
252 1, /* vec_store_cost. */
253 3, /* cond_taken_branch_cost. */
254 1, /* cond_not_taken_branch_cost. */
257 static stringop_algs i486_memcpy[2] = {
258 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
259 DUMMY_STRINGOP_ALGS};
260 static stringop_algs i486_memset[2] = {
261 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
262 DUMMY_STRINGOP_ALGS};
265 struct processor_costs i486_cost = { /* 486 specific costs */
266 COSTS_N_INSNS (1), /* cost of an add instruction */
267 COSTS_N_INSNS (1), /* cost of a lea instruction */
268 COSTS_N_INSNS (3), /* variable shift costs */
269 COSTS_N_INSNS (2), /* constant shift costs */
270 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
271 COSTS_N_INSNS (12), /* HI */
272 COSTS_N_INSNS (12), /* SI */
273 COSTS_N_INSNS (12), /* DI */
274 COSTS_N_INSNS (12)}, /* other */
275 1, /* cost of multiply per each bit set */
276 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
277 COSTS_N_INSNS (40), /* HI */
278 COSTS_N_INSNS (40), /* SI */
279 COSTS_N_INSNS (40), /* DI */
280 COSTS_N_INSNS (40)}, /* other */
281 COSTS_N_INSNS (3), /* cost of movsx */
282 COSTS_N_INSNS (2), /* cost of movzx */
283 15, /* "large" insn */
285 4, /* cost for loading QImode using movzbl */
286 {2, 4, 2}, /* cost of loading integer registers
287 in QImode, HImode and SImode.
288 Relative to reg-reg move (2). */
289 {2, 4, 2}, /* cost of storing integer registers */
290 2, /* cost of reg,reg fld/fst */
291 {8, 8, 8}, /* cost of loading fp registers
292 in SFmode, DFmode and XFmode */
293 {8, 8, 8}, /* cost of storing fp registers
294 in SFmode, DFmode and XFmode */
295 2, /* cost of moving MMX register */
296 {4, 8}, /* cost of loading MMX registers
297 in SImode and DImode */
298 {4, 8}, /* cost of storing MMX registers
299 in SImode and DImode */
300 2, /* cost of moving SSE register */
301 {4, 8, 16}, /* cost of loading SSE registers
302 in SImode, DImode and TImode */
303 {4, 8, 16}, /* cost of storing SSE registers
304 in SImode, DImode and TImode */
305 3, /* MMX or SSE register to integer */
306 4, /* size of l1 cache. 486 has 8kB cache
307 shared for code and data, so 4kB is
308 not really precise. */
309 4, /* size of l2 cache */
310 0, /* size of prefetch block */
311 0, /* number of parallel prefetches */
313 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
314 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
315 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
316 COSTS_N_INSNS (3), /* cost of FABS instruction. */
317 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
318 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
321 1, /* scalar_stmt_cost. */
322 1, /* scalar load_cost. */
323 1, /* scalar_store_cost. */
324 1, /* vec_stmt_cost. */
325 1, /* vec_to_scalar_cost. */
326 1, /* scalar_to_vec_cost. */
327 1, /* vec_align_load_cost. */
328 2, /* vec_unalign_load_cost. */
329 1, /* vec_store_cost. */
330 3, /* cond_taken_branch_cost. */
331 1, /* cond_not_taken_branch_cost. */
334 static stringop_algs pentium_memcpy[2] = {
335 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
336 DUMMY_STRINGOP_ALGS};
337 static stringop_algs pentium_memset[2] = {
338 {libcall, {{-1, rep_prefix_4_byte, false}}},
339 DUMMY_STRINGOP_ALGS};
342 struct processor_costs pentium_cost = {
343 COSTS_N_INSNS (1), /* cost of an add instruction */
344 COSTS_N_INSNS (1), /* cost of a lea instruction */
345 COSTS_N_INSNS (4), /* variable shift costs */
346 COSTS_N_INSNS (1), /* constant shift costs */
347 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
348 COSTS_N_INSNS (11), /* HI */
349 COSTS_N_INSNS (11), /* SI */
350 COSTS_N_INSNS (11), /* DI */
351 COSTS_N_INSNS (11)}, /* other */
352 0, /* cost of multiply per each bit set */
353 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
354 COSTS_N_INSNS (25), /* HI */
355 COSTS_N_INSNS (25), /* SI */
356 COSTS_N_INSNS (25), /* DI */
357 COSTS_N_INSNS (25)}, /* other */
358 COSTS_N_INSNS (3), /* cost of movsx */
359 COSTS_N_INSNS (2), /* cost of movzx */
360 8, /* "large" insn */
362 6, /* cost for loading QImode using movzbl */
363 {2, 4, 2}, /* cost of loading integer registers
364 in QImode, HImode and SImode.
365 Relative to reg-reg move (2). */
366 {2, 4, 2}, /* cost of storing integer registers */
367 2, /* cost of reg,reg fld/fst */
368 {2, 2, 6}, /* cost of loading fp registers
369 in SFmode, DFmode and XFmode */
370 {4, 4, 6}, /* cost of storing fp registers
371 in SFmode, DFmode and XFmode */
372 8, /* cost of moving MMX register */
373 {8, 8}, /* cost of loading MMX registers
374 in SImode and DImode */
375 {8, 8}, /* cost of storing MMX registers
376 in SImode and DImode */
377 2, /* cost of moving SSE register */
378 {4, 8, 16}, /* cost of loading SSE registers
379 in SImode, DImode and TImode */
380 {4, 8, 16}, /* cost of storing SSE registers
381 in SImode, DImode and TImode */
382 3, /* MMX or SSE register to integer */
383 8, /* size of l1 cache. */
384 8, /* size of l2 cache */
385 0, /* size of prefetch block */
386 0, /* number of parallel prefetches */
388 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
389 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
390 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
391 COSTS_N_INSNS (1), /* cost of FABS instruction. */
392 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
393 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
396 1, /* scalar_stmt_cost. */
397 1, /* scalar load_cost. */
398 1, /* scalar_store_cost. */
399 1, /* vec_stmt_cost. */
400 1, /* vec_to_scalar_cost. */
401 1, /* scalar_to_vec_cost. */
402 1, /* vec_align_load_cost. */
403 2, /* vec_unalign_load_cost. */
404 1, /* vec_store_cost. */
405 3, /* cond_taken_branch_cost. */
406 1, /* cond_not_taken_branch_cost. */
410 struct processor_costs lakemont_cost = {
411 COSTS_N_INSNS (1), /* cost of an add instruction */
412 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
413 COSTS_N_INSNS (1), /* variable shift costs */
414 COSTS_N_INSNS (1), /* constant shift costs */
415 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
416 COSTS_N_INSNS (11), /* HI */
417 COSTS_N_INSNS (11), /* SI */
418 COSTS_N_INSNS (11), /* DI */
419 COSTS_N_INSNS (11)}, /* other */
420 0, /* cost of multiply per each bit set */
421 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
422 COSTS_N_INSNS (25), /* HI */
423 COSTS_N_INSNS (25), /* SI */
424 COSTS_N_INSNS (25), /* DI */
425 COSTS_N_INSNS (25)}, /* other */
426 COSTS_N_INSNS (3), /* cost of movsx */
427 COSTS_N_INSNS (2), /* cost of movzx */
428 8, /* "large" insn */
430 6, /* cost for loading QImode using movzbl */
431 {2, 4, 2}, /* cost of loading integer registers
432 in QImode, HImode and SImode.
433 Relative to reg-reg move (2). */
434 {2, 4, 2}, /* cost of storing integer registers */
435 2, /* cost of reg,reg fld/fst */
436 {2, 2, 6}, /* cost of loading fp registers
437 in SFmode, DFmode and XFmode */
438 {4, 4, 6}, /* cost of storing fp registers
439 in SFmode, DFmode and XFmode */
440 8, /* cost of moving MMX register */
441 {8, 8}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {8, 8}, /* cost of storing MMX registers
444 in SImode and DImode */
445 2, /* cost of moving SSE register */
446 {4, 8, 16}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {4, 8, 16}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 3, /* MMX or SSE register to integer */
451 8, /* size of l1 cache. */
452 8, /* size of l2 cache */
453 0, /* size of prefetch block */
454 0, /* number of parallel prefetches */
456 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
457 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
458 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
459 COSTS_N_INSNS (1), /* cost of FABS instruction. */
460 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
461 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
464 1, /* scalar_stmt_cost. */
465 1, /* scalar load_cost. */
466 1, /* scalar_store_cost. */
467 1, /* vec_stmt_cost. */
468 1, /* vec_to_scalar_cost. */
469 1, /* scalar_to_vec_cost. */
470 1, /* vec_align_load_cost. */
471 2, /* vec_unalign_load_cost. */
472 1, /* vec_store_cost. */
473 3, /* cond_taken_branch_cost. */
474 1, /* cond_not_taken_branch_cost. */
477 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
478 (we ensure the alignment). For small blocks inline loop is still a
479 noticeable win, for bigger blocks either rep movsl or rep movsb is
480 way to go. Rep movsb has apparently more expensive startup time in CPU,
481 but after 4K the difference is down in the noise. */
482 static stringop_algs pentiumpro_memcpy[2] = {
483 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
484 {8192, rep_prefix_4_byte, false},
485 {-1, rep_prefix_1_byte, false}}},
486 DUMMY_STRINGOP_ALGS};
487 static stringop_algs pentiumpro_memset[2] = {
488 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
489 {8192, rep_prefix_4_byte, false},
490 {-1, libcall, false}}},
491 DUMMY_STRINGOP_ALGS};
493 struct processor_costs pentiumpro_cost = {
494 COSTS_N_INSNS (1), /* cost of an add instruction */
495 COSTS_N_INSNS (1), /* cost of a lea instruction */
496 COSTS_N_INSNS (1), /* variable shift costs */
497 COSTS_N_INSNS (1), /* constant shift costs */
498 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
499 COSTS_N_INSNS (4), /* HI */
500 COSTS_N_INSNS (4), /* SI */
501 COSTS_N_INSNS (4), /* DI */
502 COSTS_N_INSNS (4)}, /* other */
503 0, /* cost of multiply per each bit set */
504 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
505 COSTS_N_INSNS (17), /* HI */
506 COSTS_N_INSNS (17), /* SI */
507 COSTS_N_INSNS (17), /* DI */
508 COSTS_N_INSNS (17)}, /* other */
509 COSTS_N_INSNS (1), /* cost of movsx */
510 COSTS_N_INSNS (1), /* cost of movzx */
511 8, /* "large" insn */
513 2, /* cost for loading QImode using movzbl */
514 {4, 4, 4}, /* cost of loading integer registers
515 in QImode, HImode and SImode.
516 Relative to reg-reg move (2). */
517 {2, 2, 2}, /* cost of storing integer registers */
518 2, /* cost of reg,reg fld/fst */
519 {2, 2, 6}, /* cost of loading fp registers
520 in SFmode, DFmode and XFmode */
521 {4, 4, 6}, /* cost of storing fp registers
522 in SFmode, DFmode and XFmode */
523 2, /* cost of moving MMX register */
524 {2, 2}, /* cost of loading MMX registers
525 in SImode and DImode */
526 {2, 2}, /* cost of storing MMX registers
527 in SImode and DImode */
528 2, /* cost of moving SSE register */
529 {2, 2, 8}, /* cost of loading SSE registers
530 in SImode, DImode and TImode */
531 {2, 2, 8}, /* cost of storing SSE registers
532 in SImode, DImode and TImode */
533 3, /* MMX or SSE register to integer */
534 8, /* size of l1 cache. */
535 256, /* size of l2 cache */
536 32, /* size of prefetch block */
537 6, /* number of parallel prefetches */
539 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
540 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
541 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
542 COSTS_N_INSNS (2), /* cost of FABS instruction. */
543 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
544 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
547 1, /* scalar_stmt_cost. */
548 1, /* scalar load_cost. */
549 1, /* scalar_store_cost. */
550 1, /* vec_stmt_cost. */
551 1, /* vec_to_scalar_cost. */
552 1, /* scalar_to_vec_cost. */
553 1, /* vec_align_load_cost. */
554 2, /* vec_unalign_load_cost. */
555 1, /* vec_store_cost. */
556 3, /* cond_taken_branch_cost. */
557 1, /* cond_not_taken_branch_cost. */
560 static stringop_algs geode_memcpy[2] = {
561 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
562 DUMMY_STRINGOP_ALGS};
563 static stringop_algs geode_memset[2] = {
564 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
565 DUMMY_STRINGOP_ALGS};
567 struct processor_costs geode_cost = {
568 COSTS_N_INSNS (1), /* cost of an add instruction */
569 COSTS_N_INSNS (1), /* cost of a lea instruction */
570 COSTS_N_INSNS (2), /* variable shift costs */
571 COSTS_N_INSNS (1), /* constant shift costs */
572 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
573 COSTS_N_INSNS (4), /* HI */
574 COSTS_N_INSNS (7), /* SI */
575 COSTS_N_INSNS (7), /* DI */
576 COSTS_N_INSNS (7)}, /* other */
577 0, /* cost of multiply per each bit set */
578 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
579 COSTS_N_INSNS (23), /* HI */
580 COSTS_N_INSNS (39), /* SI */
581 COSTS_N_INSNS (39), /* DI */
582 COSTS_N_INSNS (39)}, /* other */
583 COSTS_N_INSNS (1), /* cost of movsx */
584 COSTS_N_INSNS (1), /* cost of movzx */
585 8, /* "large" insn */
587 1, /* cost for loading QImode using movzbl */
588 {1, 1, 1}, /* cost of loading integer registers
589 in QImode, HImode and SImode.
590 Relative to reg-reg move (2). */
591 {1, 1, 1}, /* cost of storing integer registers */
592 1, /* cost of reg,reg fld/fst */
593 {1, 1, 1}, /* cost of loading fp registers
594 in SFmode, DFmode and XFmode */
595 {4, 6, 6}, /* cost of storing fp registers
596 in SFmode, DFmode and XFmode */
598 2, /* cost of moving MMX register */
599 {2, 2}, /* cost of loading MMX registers
600 in SImode and DImode */
601 {2, 2}, /* cost of storing MMX registers
602 in SImode and DImode */
603 2, /* cost of moving SSE register */
604 {2, 2, 8}, /* cost of loading SSE registers
605 in SImode, DImode and TImode */
606 {2, 2, 8}, /* cost of storing SSE registers
607 in SImode, DImode and TImode */
608 3, /* MMX or SSE register to integer */
609 64, /* size of l1 cache. */
610 128, /* size of l2 cache. */
611 32, /* size of prefetch block */
612 1, /* number of parallel prefetches */
614 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
615 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
616 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
617 COSTS_N_INSNS (1), /* cost of FABS instruction. */
618 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
619 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
622 1, /* scalar_stmt_cost. */
623 1, /* scalar load_cost. */
624 1, /* scalar_store_cost. */
625 1, /* vec_stmt_cost. */
626 1, /* vec_to_scalar_cost. */
627 1, /* scalar_to_vec_cost. */
628 1, /* vec_align_load_cost. */
629 2, /* vec_unalign_load_cost. */
630 1, /* vec_store_cost. */
631 3, /* cond_taken_branch_cost. */
632 1, /* cond_not_taken_branch_cost. */
635 static stringop_algs k6_memcpy[2] = {
636 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
637 DUMMY_STRINGOP_ALGS};
638 static stringop_algs k6_memset[2] = {
639 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
640 DUMMY_STRINGOP_ALGS};
642 struct processor_costs k6_cost = {
643 COSTS_N_INSNS (1), /* cost of an add instruction */
644 COSTS_N_INSNS (2), /* cost of a lea instruction */
645 COSTS_N_INSNS (1), /* variable shift costs */
646 COSTS_N_INSNS (1), /* constant shift costs */
647 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
648 COSTS_N_INSNS (3), /* HI */
649 COSTS_N_INSNS (3), /* SI */
650 COSTS_N_INSNS (3), /* DI */
651 COSTS_N_INSNS (3)}, /* other */
652 0, /* cost of multiply per each bit set */
653 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
654 COSTS_N_INSNS (18), /* HI */
655 COSTS_N_INSNS (18), /* SI */
656 COSTS_N_INSNS (18), /* DI */
657 COSTS_N_INSNS (18)}, /* other */
658 COSTS_N_INSNS (2), /* cost of movsx */
659 COSTS_N_INSNS (2), /* cost of movzx */
660 8, /* "large" insn */
662 3, /* cost for loading QImode using movzbl */
663 {4, 5, 4}, /* cost of loading integer registers
664 in QImode, HImode and SImode.
665 Relative to reg-reg move (2). */
666 {2, 3, 2}, /* cost of storing integer registers */
667 4, /* cost of reg,reg fld/fst */
668 {6, 6, 6}, /* cost of loading fp registers
669 in SFmode, DFmode and XFmode */
670 {4, 4, 4}, /* cost of storing fp registers
671 in SFmode, DFmode and XFmode */
672 2, /* cost of moving MMX register */
673 {2, 2}, /* cost of loading MMX registers
674 in SImode and DImode */
675 {2, 2}, /* cost of storing MMX registers
676 in SImode and DImode */
677 2, /* cost of moving SSE register */
678 {2, 2, 8}, /* cost of loading SSE registers
679 in SImode, DImode and TImode */
680 {2, 2, 8}, /* cost of storing SSE registers
681 in SImode, DImode and TImode */
682 6, /* MMX or SSE register to integer */
683 32, /* size of l1 cache. */
684 32, /* size of l2 cache. Some models
685 have integrated l2 cache, but
686 optimizing for k6 is not important
687 enough to worry about that. */
688 32, /* size of prefetch block */
689 1, /* number of parallel prefetches */
691 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
692 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
693 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
694 COSTS_N_INSNS (2), /* cost of FABS instruction. */
695 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
696 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
699 1, /* scalar_stmt_cost. */
700 1, /* scalar load_cost. */
701 1, /* scalar_store_cost. */
702 1, /* vec_stmt_cost. */
703 1, /* vec_to_scalar_cost. */
704 1, /* scalar_to_vec_cost. */
705 1, /* vec_align_load_cost. */
706 2, /* vec_unalign_load_cost. */
707 1, /* vec_store_cost. */
708 3, /* cond_taken_branch_cost. */
709 1, /* cond_not_taken_branch_cost. */
712 /* For some reason, Athlon deals better with REP prefix (relative to loops)
713 compared to K8. Alignment becomes important after 8 bytes for memcpy and
714 128 bytes for memset. */
715 static stringop_algs athlon_memcpy[2] = {
716 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
717 DUMMY_STRINGOP_ALGS};
718 static stringop_algs athlon_memset[2] = {
719 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
720 DUMMY_STRINGOP_ALGS};
722 struct processor_costs athlon_cost = {
723 COSTS_N_INSNS (1), /* cost of an add instruction */
724 COSTS_N_INSNS (2), /* cost of a lea instruction */
725 COSTS_N_INSNS (1), /* variable shift costs */
726 COSTS_N_INSNS (1), /* constant shift costs */
727 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
728 COSTS_N_INSNS (5), /* HI */
729 COSTS_N_INSNS (5), /* SI */
730 COSTS_N_INSNS (5), /* DI */
731 COSTS_N_INSNS (5)}, /* other */
732 0, /* cost of multiply per each bit set */
733 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
734 COSTS_N_INSNS (26), /* HI */
735 COSTS_N_INSNS (42), /* SI */
736 COSTS_N_INSNS (74), /* DI */
737 COSTS_N_INSNS (74)}, /* other */
738 COSTS_N_INSNS (1), /* cost of movsx */
739 COSTS_N_INSNS (1), /* cost of movzx */
740 8, /* "large" insn */
742 4, /* cost for loading QImode using movzbl */
743 {3, 4, 3}, /* cost of loading integer registers
744 in QImode, HImode and SImode.
745 Relative to reg-reg move (2). */
746 {3, 4, 3}, /* cost of storing integer registers */
747 4, /* cost of reg,reg fld/fst */
748 {4, 4, 12}, /* cost of loading fp registers
749 in SFmode, DFmode and XFmode */
750 {6, 6, 8}, /* cost of storing fp registers
751 in SFmode, DFmode and XFmode */
752 2, /* cost of moving MMX register */
753 {4, 4}, /* cost of loading MMX registers
754 in SImode and DImode */
755 {4, 4}, /* cost of storing MMX registers
756 in SImode and DImode */
757 2, /* cost of moving SSE register */
758 {4, 4, 6}, /* cost of loading SSE registers
759 in SImode, DImode and TImode */
760 {4, 4, 5}, /* cost of storing SSE registers
761 in SImode, DImode and TImode */
762 5, /* MMX or SSE register to integer */
763 64, /* size of l1 cache. */
764 256, /* size of l2 cache. */
765 64, /* size of prefetch block */
766 6, /* number of parallel prefetches */
768 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
769 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
770 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
771 COSTS_N_INSNS (2), /* cost of FABS instruction. */
772 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
773 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
776 1, /* scalar_stmt_cost. */
777 1, /* scalar load_cost. */
778 1, /* scalar_store_cost. */
779 1, /* vec_stmt_cost. */
780 1, /* vec_to_scalar_cost. */
781 1, /* scalar_to_vec_cost. */
782 1, /* vec_align_load_cost. */
783 2, /* vec_unalign_load_cost. */
784 1, /* vec_store_cost. */
785 3, /* cond_taken_branch_cost. */
786 1, /* cond_not_taken_branch_cost. */
789 /* K8 has optimized REP instruction for medium sized blocks, but for very
790 small blocks it is better to use loop. For large blocks, libcall can
791 do nontemporary accesses and beat inline considerably. */
792 static stringop_algs k8_memcpy[2] = {
793 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
794 {-1, rep_prefix_4_byte, false}}},
795 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
796 {-1, libcall, false}}}};
797 static stringop_algs k8_memset[2] = {
798 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
799 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
800 {libcall, {{48, unrolled_loop, false},
801 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
803 struct processor_costs k8_cost = {
804 COSTS_N_INSNS (1), /* cost of an add instruction */
805 COSTS_N_INSNS (2), /* cost of a lea instruction */
806 COSTS_N_INSNS (1), /* variable shift costs */
807 COSTS_N_INSNS (1), /* constant shift costs */
808 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
809 COSTS_N_INSNS (4), /* HI */
810 COSTS_N_INSNS (3), /* SI */
811 COSTS_N_INSNS (4), /* DI */
812 COSTS_N_INSNS (5)}, /* other */
813 0, /* cost of multiply per each bit set */
814 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
815 COSTS_N_INSNS (26), /* HI */
816 COSTS_N_INSNS (42), /* SI */
817 COSTS_N_INSNS (74), /* DI */
818 COSTS_N_INSNS (74)}, /* other */
819 COSTS_N_INSNS (1), /* cost of movsx */
820 COSTS_N_INSNS (1), /* cost of movzx */
821 8, /* "large" insn */
823 4, /* cost for loading QImode using movzbl */
824 {3, 4, 3}, /* cost of loading integer registers
825 in QImode, HImode and SImode.
826 Relative to reg-reg move (2). */
827 {3, 4, 3}, /* cost of storing integer registers */
828 4, /* cost of reg,reg fld/fst */
829 {4, 4, 12}, /* cost of loading fp registers
830 in SFmode, DFmode and XFmode */
831 {6, 6, 8}, /* cost of storing fp registers
832 in SFmode, DFmode and XFmode */
833 2, /* cost of moving MMX register */
834 {3, 3}, /* cost of loading MMX registers
835 in SImode and DImode */
836 {4, 4}, /* cost of storing MMX registers
837 in SImode and DImode */
838 2, /* cost of moving SSE register */
839 {4, 3, 6}, /* cost of loading SSE registers
840 in SImode, DImode and TImode */
841 {4, 4, 5}, /* cost of storing SSE registers
842 in SImode, DImode and TImode */
843 5, /* MMX or SSE register to integer */
844 64, /* size of l1 cache. */
845 512, /* size of l2 cache. */
846 64, /* size of prefetch block */
847 /* New AMD processors never drop prefetches; if they cannot be performed
848 immediately, they are queued. We set number of simultaneous prefetches
849 to a large constant to reflect this (it probably is not a good idea not
850 to limit number of prefetches at all, as their execution also takes some
852 100, /* number of parallel prefetches */
854 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
855 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
856 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
857 COSTS_N_INSNS (2), /* cost of FABS instruction. */
858 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
859 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
863 4, /* scalar_stmt_cost. */
864 2, /* scalar load_cost. */
865 2, /* scalar_store_cost. */
866 5, /* vec_stmt_cost. */
867 0, /* vec_to_scalar_cost. */
868 2, /* scalar_to_vec_cost. */
869 2, /* vec_align_load_cost. */
870 3, /* vec_unalign_load_cost. */
871 3, /* vec_store_cost. */
872 3, /* cond_taken_branch_cost. */
873 2, /* cond_not_taken_branch_cost. */
876 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
877 very small blocks it is better to use loop. For large blocks, libcall can
878 do nontemporary accesses and beat inline considerably. */
879 static stringop_algs amdfam10_memcpy[2] = {
880 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
881 {-1, rep_prefix_4_byte, false}}},
882 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
883 {-1, libcall, false}}}};
884 static stringop_algs amdfam10_memset[2] = {
885 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
886 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
887 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
888 {-1, libcall, false}}}};
889 struct processor_costs amdfam10_cost = {
890 COSTS_N_INSNS (1), /* cost of an add instruction */
891 COSTS_N_INSNS (2), /* cost of a lea instruction */
892 COSTS_N_INSNS (1), /* variable shift costs */
893 COSTS_N_INSNS (1), /* constant shift costs */
894 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
895 COSTS_N_INSNS (4), /* HI */
896 COSTS_N_INSNS (3), /* SI */
897 COSTS_N_INSNS (4), /* DI */
898 COSTS_N_INSNS (5)}, /* other */
899 0, /* cost of multiply per each bit set */
900 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
901 COSTS_N_INSNS (35), /* HI */
902 COSTS_N_INSNS (51), /* SI */
903 COSTS_N_INSNS (83), /* DI */
904 COSTS_N_INSNS (83)}, /* other */
905 COSTS_N_INSNS (1), /* cost of movsx */
906 COSTS_N_INSNS (1), /* cost of movzx */
907 8, /* "large" insn */
909 4, /* cost for loading QImode using movzbl */
910 {3, 4, 3}, /* cost of loading integer registers
911 in QImode, HImode and SImode.
912 Relative to reg-reg move (2). */
913 {3, 4, 3}, /* cost of storing integer registers */
914 4, /* cost of reg,reg fld/fst */
915 {4, 4, 12}, /* cost of loading fp registers
916 in SFmode, DFmode and XFmode */
917 {6, 6, 8}, /* cost of storing fp registers
918 in SFmode, DFmode and XFmode */
919 2, /* cost of moving MMX register */
920 {3, 3}, /* cost of loading MMX registers
921 in SImode and DImode */
922 {4, 4}, /* cost of storing MMX registers
923 in SImode and DImode */
924 2, /* cost of moving SSE register */
925 {4, 4, 3}, /* cost of loading SSE registers
926 in SImode, DImode and TImode */
927 {4, 4, 5}, /* cost of storing SSE registers
928 in SImode, DImode and TImode */
929 3, /* MMX or SSE register to integer */
931 MOVD reg64, xmmreg Double FSTORE 4
932 MOVD reg32, xmmreg Double FSTORE 4
934 MOVD reg64, xmmreg Double FADD 3
936 MOVD reg32, xmmreg Double FADD 3
938 64, /* size of l1 cache. */
939 512, /* size of l2 cache. */
940 64, /* size of prefetch block */
941 /* New AMD processors never drop prefetches; if they cannot be performed
942 immediately, they are queued. We set number of simultaneous prefetches
943 to a large constant to reflect this (it probably is not a good idea not
944 to limit number of prefetches at all, as their execution also takes some
946 100, /* number of parallel prefetches */
948 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
949 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
950 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
951 COSTS_N_INSNS (2), /* cost of FABS instruction. */
952 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
953 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
957 4, /* scalar_stmt_cost. */
958 2, /* scalar load_cost. */
959 2, /* scalar_store_cost. */
960 6, /* vec_stmt_cost. */
961 0, /* vec_to_scalar_cost. */
962 2, /* scalar_to_vec_cost. */
963 2, /* vec_align_load_cost. */
964 2, /* vec_unalign_load_cost. */
965 2, /* vec_store_cost. */
966 2, /* cond_taken_branch_cost. */
967 1, /* cond_not_taken_branch_cost. */
970 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
971 very small blocks it is better to use loop. For large blocks, libcall
972 can do nontemporary accesses and beat inline considerably. */
973 static stringop_algs bdver1_memcpy[2] = {
974 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
975 {-1, rep_prefix_4_byte, false}}},
976 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
977 {-1, libcall, false}}}};
978 static stringop_algs bdver1_memset[2] = {
979 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
980 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
981 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
982 {-1, libcall, false}}}};
984 const struct processor_costs bdver1_cost = {
985 COSTS_N_INSNS (1), /* cost of an add instruction */
986 COSTS_N_INSNS (1), /* cost of a lea instruction */
987 COSTS_N_INSNS (1), /* variable shift costs */
988 COSTS_N_INSNS (1), /* constant shift costs */
989 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
990 COSTS_N_INSNS (4), /* HI */
991 COSTS_N_INSNS (4), /* SI */
992 COSTS_N_INSNS (6), /* DI */
993 COSTS_N_INSNS (6)}, /* other */
994 0, /* cost of multiply per each bit set */
995 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
996 COSTS_N_INSNS (35), /* HI */
997 COSTS_N_INSNS (51), /* SI */
998 COSTS_N_INSNS (83), /* DI */
999 COSTS_N_INSNS (83)}, /* other */
1000 COSTS_N_INSNS (1), /* cost of movsx */
1001 COSTS_N_INSNS (1), /* cost of movzx */
1002 8, /* "large" insn */
1004 4, /* cost for loading QImode using movzbl */
1005 {5, 5, 4}, /* cost of loading integer registers
1006 in QImode, HImode and SImode.
1007 Relative to reg-reg move (2). */
1008 {4, 4, 4}, /* cost of storing integer registers */
1009 2, /* cost of reg,reg fld/fst */
1010 {5, 5, 12}, /* cost of loading fp registers
1011 in SFmode, DFmode and XFmode */
1012 {4, 4, 8}, /* cost of storing fp registers
1013 in SFmode, DFmode and XFmode */
1014 2, /* cost of moving MMX register */
1015 {4, 4}, /* cost of loading MMX registers
1016 in SImode and DImode */
1017 {4, 4}, /* cost of storing MMX registers
1018 in SImode and DImode */
1019 2, /* cost of moving SSE register */
1020 {4, 4, 4}, /* cost of loading SSE registers
1021 in SImode, DImode and TImode */
1022 {4, 4, 4}, /* cost of storing SSE registers
1023 in SImode, DImode and TImode */
1024 2, /* MMX or SSE register to integer */
1026 MOVD reg64, xmmreg Double FSTORE 4
1027 MOVD reg32, xmmreg Double FSTORE 4
1029 MOVD reg64, xmmreg Double FADD 3
1031 MOVD reg32, xmmreg Double FADD 3
1033 16, /* size of l1 cache. */
1034 2048, /* size of l2 cache. */
1035 64, /* size of prefetch block */
1036 /* New AMD processors never drop prefetches; if they cannot be performed
1037 immediately, they are queued. We set number of simultaneous prefetches
1038 to a large constant to reflect this (it probably is not a good idea not
1039 to limit number of prefetches at all, as their execution also takes some
1041 100, /* number of parallel prefetches */
1042 2, /* Branch cost */
1043 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1044 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1045 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1046 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1047 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1048 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1052 6, /* scalar_stmt_cost. */
1053 4, /* scalar load_cost. */
1054 4, /* scalar_store_cost. */
1055 6, /* vec_stmt_cost. */
1056 0, /* vec_to_scalar_cost. */
1057 2, /* scalar_to_vec_cost. */
1058 4, /* vec_align_load_cost. */
1059 4, /* vec_unalign_load_cost. */
1060 4, /* vec_store_cost. */
1061 4, /* cond_taken_branch_cost. */
1062 2, /* cond_not_taken_branch_cost. */
1065 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1066 very small blocks it is better to use loop. For large blocks, libcall
1067 can do nontemporary accesses and beat inline considerably. */
1069 static stringop_algs bdver2_memcpy[2] = {
1070 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1071 {-1, rep_prefix_4_byte, false}}},
1072 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1073 {-1, libcall, false}}}};
1074 static stringop_algs bdver2_memset[2] = {
1075 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1076 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1077 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1078 {-1, libcall, false}}}};
1080 const struct processor_costs bdver2_cost = {
1081 COSTS_N_INSNS (1), /* cost of an add instruction */
1082 COSTS_N_INSNS (1), /* cost of a lea instruction */
1083 COSTS_N_INSNS (1), /* variable shift costs */
1084 COSTS_N_INSNS (1), /* constant shift costs */
1085 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1086 COSTS_N_INSNS (4), /* HI */
1087 COSTS_N_INSNS (4), /* SI */
1088 COSTS_N_INSNS (6), /* DI */
1089 COSTS_N_INSNS (6)}, /* other */
1090 0, /* cost of multiply per each bit set */
1091 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1092 COSTS_N_INSNS (35), /* HI */
1093 COSTS_N_INSNS (51), /* SI */
1094 COSTS_N_INSNS (83), /* DI */
1095 COSTS_N_INSNS (83)}, /* other */
1096 COSTS_N_INSNS (1), /* cost of movsx */
1097 COSTS_N_INSNS (1), /* cost of movzx */
1098 8, /* "large" insn */
1100 4, /* cost for loading QImode using movzbl */
1101 {5, 5, 4}, /* cost of loading integer registers
1102 in QImode, HImode and SImode.
1103 Relative to reg-reg move (2). */
1104 {4, 4, 4}, /* cost of storing integer registers */
1105 2, /* cost of reg,reg fld/fst */
1106 {5, 5, 12}, /* cost of loading fp registers
1107 in SFmode, DFmode and XFmode */
1108 {4, 4, 8}, /* cost of storing fp registers
1109 in SFmode, DFmode and XFmode */
1110 2, /* cost of moving MMX register */
1111 {4, 4}, /* cost of loading MMX registers
1112 in SImode and DImode */
1113 {4, 4}, /* cost of storing MMX registers
1114 in SImode and DImode */
1115 2, /* cost of moving SSE register */
1116 {4, 4, 4}, /* cost of loading SSE registers
1117 in SImode, DImode and TImode */
1118 {4, 4, 4}, /* cost of storing SSE registers
1119 in SImode, DImode and TImode */
1120 2, /* MMX or SSE register to integer */
1122 MOVD reg64, xmmreg Double FSTORE 4
1123 MOVD reg32, xmmreg Double FSTORE 4
1125 MOVD reg64, xmmreg Double FADD 3
1127 MOVD reg32, xmmreg Double FADD 3
1129 16, /* size of l1 cache. */
1130 2048, /* size of l2 cache. */
1131 64, /* size of prefetch block */
1132 /* New AMD processors never drop prefetches; if they cannot be performed
1133 immediately, they are queued. We set number of simultaneous prefetches
1134 to a large constant to reflect this (it probably is not a good idea not
1135 to limit number of prefetches at all, as their execution also takes some
1137 100, /* number of parallel prefetches */
1138 2, /* Branch cost */
1139 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1140 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1141 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1142 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1143 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1144 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1148 6, /* scalar_stmt_cost. */
1149 4, /* scalar load_cost. */
1150 4, /* scalar_store_cost. */
1151 6, /* vec_stmt_cost. */
1152 0, /* vec_to_scalar_cost. */
1153 2, /* scalar_to_vec_cost. */
1154 4, /* vec_align_load_cost. */
1155 4, /* vec_unalign_load_cost. */
1156 4, /* vec_store_cost. */
1157 4, /* cond_taken_branch_cost. */
1158 2, /* cond_not_taken_branch_cost. */
1162 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1163 very small blocks it is better to use loop. For large blocks, libcall
1164 can do nontemporary accesses and beat inline considerably. */
1165 static stringop_algs bdver3_memcpy[2] = {
1166 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1167 {-1, rep_prefix_4_byte, false}}},
1168 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1169 {-1, libcall, false}}}};
1170 static stringop_algs bdver3_memset[2] = {
1171 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1172 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1173 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1174 {-1, libcall, false}}}};
1175 struct processor_costs bdver3_cost = {
1176 COSTS_N_INSNS (1), /* cost of an add instruction */
1177 COSTS_N_INSNS (1), /* cost of a lea instruction */
1178 COSTS_N_INSNS (1), /* variable shift costs */
1179 COSTS_N_INSNS (1), /* constant shift costs */
1180 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1181 COSTS_N_INSNS (4), /* HI */
1182 COSTS_N_INSNS (4), /* SI */
1183 COSTS_N_INSNS (6), /* DI */
1184 COSTS_N_INSNS (6)}, /* other */
1185 0, /* cost of multiply per each bit set */
1186 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1187 COSTS_N_INSNS (35), /* HI */
1188 COSTS_N_INSNS (51), /* SI */
1189 COSTS_N_INSNS (83), /* DI */
1190 COSTS_N_INSNS (83)}, /* other */
1191 COSTS_N_INSNS (1), /* cost of movsx */
1192 COSTS_N_INSNS (1), /* cost of movzx */
1193 8, /* "large" insn */
1195 4, /* cost for loading QImode using movzbl */
1196 {5, 5, 4}, /* cost of loading integer registers
1197 in QImode, HImode and SImode.
1198 Relative to reg-reg move (2). */
1199 {4, 4, 4}, /* cost of storing integer registers */
1200 2, /* cost of reg,reg fld/fst */
1201 {5, 5, 12}, /* cost of loading fp registers
1202 in SFmode, DFmode and XFmode */
1203 {4, 4, 8}, /* cost of storing fp registers
1204 in SFmode, DFmode and XFmode */
1205 2, /* cost of moving MMX register */
1206 {4, 4}, /* cost of loading MMX registers
1207 in SImode and DImode */
1208 {4, 4}, /* cost of storing MMX registers
1209 in SImode and DImode */
1210 2, /* cost of moving SSE register */
1211 {4, 4, 4}, /* cost of loading SSE registers
1212 in SImode, DImode and TImode */
1213 {4, 4, 4}, /* cost of storing SSE registers
1214 in SImode, DImode and TImode */
1215 2, /* MMX or SSE register to integer */
1216 16, /* size of l1 cache. */
1217 2048, /* size of l2 cache. */
1218 64, /* size of prefetch block */
1219 /* New AMD processors never drop prefetches; if they cannot be performed
1220 immediately, they are queued. We set number of simultaneous prefetches
1221 to a large constant to reflect this (it probably is not a good idea not
1222 to limit number of prefetches at all, as their execution also takes some
1224 100, /* number of parallel prefetches */
1225 2, /* Branch cost */
1226 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1227 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1228 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1229 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1230 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1231 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1235 6, /* scalar_stmt_cost. */
1236 4, /* scalar load_cost. */
1237 4, /* scalar_store_cost. */
1238 6, /* vec_stmt_cost. */
1239 0, /* vec_to_scalar_cost. */
1240 2, /* scalar_to_vec_cost. */
1241 4, /* vec_align_load_cost. */
1242 4, /* vec_unalign_load_cost. */
1243 4, /* vec_store_cost. */
1244 4, /* cond_taken_branch_cost. */
1245 2, /* cond_not_taken_branch_cost. */
1248 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1249 very small blocks it is better to use loop. For large blocks, libcall
1250 can do nontemporary accesses and beat inline considerably. */
1251 static stringop_algs bdver4_memcpy[2] = {
1252 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1253 {-1, rep_prefix_4_byte, false}}},
1254 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1255 {-1, libcall, false}}}};
1256 static stringop_algs bdver4_memset[2] = {
1257 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1258 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1259 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1260 {-1, libcall, false}}}};
1261 struct processor_costs bdver4_cost = {
1262 COSTS_N_INSNS (1), /* cost of an add instruction */
1263 COSTS_N_INSNS (1), /* cost of a lea instruction */
1264 COSTS_N_INSNS (1), /* variable shift costs */
1265 COSTS_N_INSNS (1), /* constant shift costs */
1266 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1267 COSTS_N_INSNS (4), /* HI */
1268 COSTS_N_INSNS (4), /* SI */
1269 COSTS_N_INSNS (6), /* DI */
1270 COSTS_N_INSNS (6)}, /* other */
1271 0, /* cost of multiply per each bit set */
1272 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1273 COSTS_N_INSNS (35), /* HI */
1274 COSTS_N_INSNS (51), /* SI */
1275 COSTS_N_INSNS (83), /* DI */
1276 COSTS_N_INSNS (83)}, /* other */
1277 COSTS_N_INSNS (1), /* cost of movsx */
1278 COSTS_N_INSNS (1), /* cost of movzx */
1279 8, /* "large" insn */
1281 4, /* cost for loading QImode using movzbl */
1282 {5, 5, 4}, /* cost of loading integer registers
1283 in QImode, HImode and SImode.
1284 Relative to reg-reg move (2). */
1285 {4, 4, 4}, /* cost of storing integer registers */
1286 2, /* cost of reg,reg fld/fst */
1287 {5, 5, 12}, /* cost of loading fp registers
1288 in SFmode, DFmode and XFmode */
1289 {4, 4, 8}, /* cost of storing fp registers
1290 in SFmode, DFmode and XFmode */
1291 2, /* cost of moving MMX register */
1292 {4, 4}, /* cost of loading MMX registers
1293 in SImode and DImode */
1294 {4, 4}, /* cost of storing MMX registers
1295 in SImode and DImode */
1296 2, /* cost of moving SSE register */
1297 {4, 4, 4}, /* cost of loading SSE registers
1298 in SImode, DImode and TImode */
1299 {4, 4, 4}, /* cost of storing SSE registers
1300 in SImode, DImode and TImode */
1301 2, /* MMX or SSE register to integer */
1302 16, /* size of l1 cache. */
1303 2048, /* size of l2 cache. */
1304 64, /* size of prefetch block */
1305 /* New AMD processors never drop prefetches; if they cannot be performed
1306 immediately, they are queued. We set number of simultaneous prefetches
1307 to a large constant to reflect this (it probably is not a good idea not
1308 to limit number of prefetches at all, as their execution also takes some
1310 100, /* number of parallel prefetches */
1311 2, /* Branch cost */
1312 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1313 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1314 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1315 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1316 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1317 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1321 6, /* scalar_stmt_cost. */
1322 4, /* scalar load_cost. */
1323 4, /* scalar_store_cost. */
1324 6, /* vec_stmt_cost. */
1325 0, /* vec_to_scalar_cost. */
1326 2, /* scalar_to_vec_cost. */
1327 4, /* vec_align_load_cost. */
1328 4, /* vec_unalign_load_cost. */
1329 4, /* vec_store_cost. */
1330 4, /* cond_taken_branch_cost. */
1331 2, /* cond_not_taken_branch_cost. */
1335 /* ZNVER1 has optimized REP instruction for medium sized blocks, but for
1336 very small blocks it is better to use loop. For large blocks, libcall
1337 can do nontemporary accesses and beat inline considerably. */
1338 static stringop_algs znver1_memcpy[2] = {
1339 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1340 {-1, rep_prefix_4_byte, false}}},
1341 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1342 {-1, libcall, false}}}};
1343 static stringop_algs znver1_memset[2] = {
1344 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1345 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1346 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1347 {-1, libcall, false}}}};
1348 struct processor_costs znver1_cost = {
1349 COSTS_N_INSNS (1), /* cost of an add instruction. */
1350 COSTS_N_INSNS (1), /* cost of a lea instruction. */
1351 COSTS_N_INSNS (1), /* variable shift costs. */
1352 COSTS_N_INSNS (1), /* constant shift costs. */
1353 {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */
1354 COSTS_N_INSNS (3), /* HI. */
1355 COSTS_N_INSNS (3), /* SI. */
1356 COSTS_N_INSNS (4), /* DI. */
1357 COSTS_N_INSNS (4)}, /* other. */
1358 0, /* cost of multiply per each bit
1360 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI. */
1361 COSTS_N_INSNS (35), /* HI. */
1362 COSTS_N_INSNS (51), /* SI. */
1363 COSTS_N_INSNS (83), /* DI. */
1364 COSTS_N_INSNS (83)}, /* other. */
1365 COSTS_N_INSNS (1), /* cost of movsx. */
1366 COSTS_N_INSNS (1), /* cost of movzx. */
1367 8, /* "large" insn. */
1368 9, /* MOVE_RATIO. */
1369 4, /* cost for loading QImode using
1371 {5, 5, 4}, /* cost of loading integer registers
1372 in QImode, HImode and SImode.
1373 Relative to reg-reg move (2). */
1374 {4, 4, 4}, /* cost of storing integer
1376 2, /* cost of reg,reg fld/fst. */
1377 {5, 5, 12}, /* cost of loading fp registers
1378 in SFmode, DFmode and XFmode. */
1379 {4, 4, 8}, /* cost of storing fp registers
1380 in SFmode, DFmode and XFmode. */
1381 2, /* cost of moving MMX register. */
1382 {4, 4}, /* cost of loading MMX registers
1383 in SImode and DImode. */
1384 {4, 4}, /* cost of storing MMX registers
1385 in SImode and DImode. */
1386 2, /* cost of moving SSE register. */
1387 {4, 4, 4}, /* cost of loading SSE registers
1388 in SImode, DImode and TImode. */
1389 {4, 4, 4}, /* cost of storing SSE registers
1390 in SImode, DImode and TImode. */
1391 2, /* MMX or SSE register to integer. */
1392 32, /* size of l1 cache. */
1393 512, /* size of l2 cache. */
1394 64, /* size of prefetch block. */
1395 /* New AMD processors never drop prefetches; if they cannot be performed
1396 immediately, they are queued. We set number of simultaneous prefetches
1397 to a large constant to reflect this (it probably is not a good idea not
1398 to limit number of prefetches at all, as their execution also takes some
1400 100, /* number of parallel prefetches. */
1401 2, /* Branch cost. */
1402 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1403 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1404 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1405 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1406 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1407 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1411 6, /* scalar_stmt_cost. */
1412 4, /* scalar load_cost. */
1413 4, /* scalar_store_cost. */
1414 6, /* vec_stmt_cost. */
1415 0, /* vec_to_scalar_cost. */
1416 2, /* scalar_to_vec_cost. */
1417 4, /* vec_align_load_cost. */
1418 4, /* vec_unalign_load_cost. */
1419 4, /* vec_store_cost. */
1420 4, /* cond_taken_branch_cost. */
1421 2, /* cond_not_taken_branch_cost. */
1424 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1425 very small blocks it is better to use loop. For large blocks, libcall can
1426 do nontemporary accesses and beat inline considerably. */
1427 static stringop_algs btver1_memcpy[2] = {
1428 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1429 {-1, rep_prefix_4_byte, false}}},
1430 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1431 {-1, libcall, false}}}};
1432 static stringop_algs btver1_memset[2] = {
1433 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1434 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1435 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1436 {-1, libcall, false}}}};
1437 const struct processor_costs btver1_cost = {
1438 COSTS_N_INSNS (1), /* cost of an add instruction */
1439 COSTS_N_INSNS (2), /* cost of a lea instruction */
1440 COSTS_N_INSNS (1), /* variable shift costs */
1441 COSTS_N_INSNS (1), /* constant shift costs */
1442 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1443 COSTS_N_INSNS (4), /* HI */
1444 COSTS_N_INSNS (3), /* SI */
1445 COSTS_N_INSNS (4), /* DI */
1446 COSTS_N_INSNS (5)}, /* other */
1447 0, /* cost of multiply per each bit set */
1448 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1449 COSTS_N_INSNS (35), /* HI */
1450 COSTS_N_INSNS (51), /* SI */
1451 COSTS_N_INSNS (83), /* DI */
1452 COSTS_N_INSNS (83)}, /* other */
1453 COSTS_N_INSNS (1), /* cost of movsx */
1454 COSTS_N_INSNS (1), /* cost of movzx */
1455 8, /* "large" insn */
1457 4, /* cost for loading QImode using movzbl */
1458 {3, 4, 3}, /* cost of loading integer registers
1459 in QImode, HImode and SImode.
1460 Relative to reg-reg move (2). */
1461 {3, 4, 3}, /* cost of storing integer registers */
1462 4, /* cost of reg,reg fld/fst */
1463 {4, 4, 12}, /* cost of loading fp registers
1464 in SFmode, DFmode and XFmode */
1465 {6, 6, 8}, /* cost of storing fp registers
1466 in SFmode, DFmode and XFmode */
1467 2, /* cost of moving MMX register */
1468 {3, 3}, /* cost of loading MMX registers
1469 in SImode and DImode */
1470 {4, 4}, /* cost of storing MMX registers
1471 in SImode and DImode */
1472 2, /* cost of moving SSE register */
1473 {4, 4, 3}, /* cost of loading SSE registers
1474 in SImode, DImode and TImode */
1475 {4, 4, 5}, /* cost of storing SSE registers
1476 in SImode, DImode and TImode */
1477 3, /* MMX or SSE register to integer */
1479 MOVD reg64, xmmreg Double FSTORE 4
1480 MOVD reg32, xmmreg Double FSTORE 4
1482 MOVD reg64, xmmreg Double FADD 3
1484 MOVD reg32, xmmreg Double FADD 3
1486 32, /* size of l1 cache. */
1487 512, /* size of l2 cache. */
1488 64, /* size of prefetch block */
1489 100, /* number of parallel prefetches */
1490 2, /* Branch cost */
1491 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1492 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1493 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1494 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1495 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1496 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1500 4, /* scalar_stmt_cost. */
1501 2, /* scalar load_cost. */
1502 2, /* scalar_store_cost. */
1503 6, /* vec_stmt_cost. */
1504 0, /* vec_to_scalar_cost. */
1505 2, /* scalar_to_vec_cost. */
1506 2, /* vec_align_load_cost. */
1507 2, /* vec_unalign_load_cost. */
1508 2, /* vec_store_cost. */
1509 2, /* cond_taken_branch_cost. */
1510 1, /* cond_not_taken_branch_cost. */
1513 static stringop_algs btver2_memcpy[2] = {
1514 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1515 {-1, rep_prefix_4_byte, false}}},
1516 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1517 {-1, libcall, false}}}};
1518 static stringop_algs btver2_memset[2] = {
1519 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1520 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1521 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1522 {-1, libcall, false}}}};
1523 const struct processor_costs btver2_cost = {
1524 COSTS_N_INSNS (1), /* cost of an add instruction */
1525 COSTS_N_INSNS (2), /* cost of a lea instruction */
1526 COSTS_N_INSNS (1), /* variable shift costs */
1527 COSTS_N_INSNS (1), /* constant shift costs */
1528 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1529 COSTS_N_INSNS (4), /* HI */
1530 COSTS_N_INSNS (3), /* SI */
1531 COSTS_N_INSNS (4), /* DI */
1532 COSTS_N_INSNS (5)}, /* other */
1533 0, /* cost of multiply per each bit set */
1534 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1535 COSTS_N_INSNS (35), /* HI */
1536 COSTS_N_INSNS (51), /* SI */
1537 COSTS_N_INSNS (83), /* DI */
1538 COSTS_N_INSNS (83)}, /* other */
1539 COSTS_N_INSNS (1), /* cost of movsx */
1540 COSTS_N_INSNS (1), /* cost of movzx */
1541 8, /* "large" insn */
1543 4, /* cost for loading QImode using movzbl */
1544 {3, 4, 3}, /* cost of loading integer registers
1545 in QImode, HImode and SImode.
1546 Relative to reg-reg move (2). */
1547 {3, 4, 3}, /* cost of storing integer registers */
1548 4, /* cost of reg,reg fld/fst */
1549 {4, 4, 12}, /* cost of loading fp registers
1550 in SFmode, DFmode and XFmode */
1551 {6, 6, 8}, /* cost of storing fp registers
1552 in SFmode, DFmode and XFmode */
1553 2, /* cost of moving MMX register */
1554 {3, 3}, /* cost of loading MMX registers
1555 in SImode and DImode */
1556 {4, 4}, /* cost of storing MMX registers
1557 in SImode and DImode */
1558 2, /* cost of moving SSE register */
1559 {4, 4, 3}, /* cost of loading SSE registers
1560 in SImode, DImode and TImode */
1561 {4, 4, 5}, /* cost of storing SSE registers
1562 in SImode, DImode and TImode */
1563 3, /* MMX or SSE register to integer */
1565 MOVD reg64, xmmreg Double FSTORE 4
1566 MOVD reg32, xmmreg Double FSTORE 4
1568 MOVD reg64, xmmreg Double FADD 3
1570 MOVD reg32, xmmreg Double FADD 3
1572 32, /* size of l1 cache. */
1573 2048, /* size of l2 cache. */
1574 64, /* size of prefetch block */
1575 100, /* number of parallel prefetches */
1576 2, /* Branch cost */
1577 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1578 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1579 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1580 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1581 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1582 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1585 4, /* scalar_stmt_cost. */
1586 2, /* scalar load_cost. */
1587 2, /* scalar_store_cost. */
1588 6, /* vec_stmt_cost. */
1589 0, /* vec_to_scalar_cost. */
1590 2, /* scalar_to_vec_cost. */
1591 2, /* vec_align_load_cost. */
1592 2, /* vec_unalign_load_cost. */
1593 2, /* vec_store_cost. */
1594 2, /* cond_taken_branch_cost. */
1595 1, /* cond_not_taken_branch_cost. */
1598 static stringop_algs pentium4_memcpy[2] = {
1599 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1600 DUMMY_STRINGOP_ALGS};
1601 static stringop_algs pentium4_memset[2] = {
1602 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1603 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1604 DUMMY_STRINGOP_ALGS};
1607 struct processor_costs pentium4_cost = {
1608 COSTS_N_INSNS (1), /* cost of an add instruction */
1609 COSTS_N_INSNS (3), /* cost of a lea instruction */
1610 COSTS_N_INSNS (4), /* variable shift costs */
1611 COSTS_N_INSNS (4), /* constant shift costs */
1612 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1613 COSTS_N_INSNS (15), /* HI */
1614 COSTS_N_INSNS (15), /* SI */
1615 COSTS_N_INSNS (15), /* DI */
1616 COSTS_N_INSNS (15)}, /* other */
1617 0, /* cost of multiply per each bit set */
1618 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1619 COSTS_N_INSNS (56), /* HI */
1620 COSTS_N_INSNS (56), /* SI */
1621 COSTS_N_INSNS (56), /* DI */
1622 COSTS_N_INSNS (56)}, /* other */
1623 COSTS_N_INSNS (1), /* cost of movsx */
1624 COSTS_N_INSNS (1), /* cost of movzx */
1625 16, /* "large" insn */
1627 2, /* cost for loading QImode using movzbl */
1628 {4, 5, 4}, /* cost of loading integer registers
1629 in QImode, HImode and SImode.
1630 Relative to reg-reg move (2). */
1631 {2, 3, 2}, /* cost of storing integer registers */
1632 2, /* cost of reg,reg fld/fst */
1633 {2, 2, 6}, /* cost of loading fp registers
1634 in SFmode, DFmode and XFmode */
1635 {4, 4, 6}, /* cost of storing fp registers
1636 in SFmode, DFmode and XFmode */
1637 2, /* cost of moving MMX register */
1638 {2, 2}, /* cost of loading MMX registers
1639 in SImode and DImode */
1640 {2, 2}, /* cost of storing MMX registers
1641 in SImode and DImode */
1642 12, /* cost of moving SSE register */
1643 {12, 12, 12}, /* cost of loading SSE registers
1644 in SImode, DImode and TImode */
1645 {2, 2, 8}, /* cost of storing SSE registers
1646 in SImode, DImode and TImode */
1647 10, /* MMX or SSE register to integer */
1648 8, /* size of l1 cache. */
1649 256, /* size of l2 cache. */
1650 64, /* size of prefetch block */
1651 6, /* number of parallel prefetches */
1652 2, /* Branch cost */
1653 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1654 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1655 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1656 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1657 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1658 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1661 1, /* scalar_stmt_cost. */
1662 1, /* scalar load_cost. */
1663 1, /* scalar_store_cost. */
1664 1, /* vec_stmt_cost. */
1665 1, /* vec_to_scalar_cost. */
1666 1, /* scalar_to_vec_cost. */
1667 1, /* vec_align_load_cost. */
1668 2, /* vec_unalign_load_cost. */
1669 1, /* vec_store_cost. */
1670 3, /* cond_taken_branch_cost. */
1671 1, /* cond_not_taken_branch_cost. */
1674 static stringop_algs nocona_memcpy[2] = {
1675 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1676 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1677 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1679 static stringop_algs nocona_memset[2] = {
1680 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1681 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1682 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1683 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1686 struct processor_costs nocona_cost = {
1687 COSTS_N_INSNS (1), /* cost of an add instruction */
1688 COSTS_N_INSNS (1), /* cost of a lea instruction */
1689 COSTS_N_INSNS (1), /* variable shift costs */
1690 COSTS_N_INSNS (1), /* constant shift costs */
1691 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1692 COSTS_N_INSNS (10), /* HI */
1693 COSTS_N_INSNS (10), /* SI */
1694 COSTS_N_INSNS (10), /* DI */
1695 COSTS_N_INSNS (10)}, /* other */
1696 0, /* cost of multiply per each bit set */
1697 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1698 COSTS_N_INSNS (66), /* HI */
1699 COSTS_N_INSNS (66), /* SI */
1700 COSTS_N_INSNS (66), /* DI */
1701 COSTS_N_INSNS (66)}, /* other */
1702 COSTS_N_INSNS (1), /* cost of movsx */
1703 COSTS_N_INSNS (1), /* cost of movzx */
1704 16, /* "large" insn */
1705 17, /* MOVE_RATIO */
1706 4, /* cost for loading QImode using movzbl */
1707 {4, 4, 4}, /* cost of loading integer registers
1708 in QImode, HImode and SImode.
1709 Relative to reg-reg move (2). */
1710 {4, 4, 4}, /* cost of storing integer registers */
1711 3, /* cost of reg,reg fld/fst */
1712 {12, 12, 12}, /* cost of loading fp registers
1713 in SFmode, DFmode and XFmode */
1714 {4, 4, 4}, /* cost of storing fp registers
1715 in SFmode, DFmode and XFmode */
1716 6, /* cost of moving MMX register */
1717 {12, 12}, /* cost of loading MMX registers
1718 in SImode and DImode */
1719 {12, 12}, /* cost of storing MMX registers
1720 in SImode and DImode */
1721 6, /* cost of moving SSE register */
1722 {12, 12, 12}, /* cost of loading SSE registers
1723 in SImode, DImode and TImode */
1724 {12, 12, 12}, /* cost of storing SSE registers
1725 in SImode, DImode and TImode */
1726 8, /* MMX or SSE register to integer */
1727 8, /* size of l1 cache. */
1728 1024, /* size of l2 cache. */
1729 64, /* size of prefetch block */
1730 8, /* number of parallel prefetches */
1731 1, /* Branch cost */
1732 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1733 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1734 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1735 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1736 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1737 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1740 1, /* scalar_stmt_cost. */
1741 1, /* scalar load_cost. */
1742 1, /* scalar_store_cost. */
1743 1, /* vec_stmt_cost. */
1744 1, /* vec_to_scalar_cost. */
1745 1, /* scalar_to_vec_cost. */
1746 1, /* vec_align_load_cost. */
1747 2, /* vec_unalign_load_cost. */
1748 1, /* vec_store_cost. */
1749 3, /* cond_taken_branch_cost. */
1750 1, /* cond_not_taken_branch_cost. */
1753 static stringop_algs atom_memcpy[2] = {
1754 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1755 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1756 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1757 static stringop_algs atom_memset[2] = {
1758 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1759 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1760 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1761 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1763 struct processor_costs atom_cost = {
1764 COSTS_N_INSNS (1), /* cost of an add instruction */
1765 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1766 COSTS_N_INSNS (1), /* variable shift costs */
1767 COSTS_N_INSNS (1), /* constant shift costs */
1768 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1769 COSTS_N_INSNS (4), /* HI */
1770 COSTS_N_INSNS (3), /* SI */
1771 COSTS_N_INSNS (4), /* DI */
1772 COSTS_N_INSNS (2)}, /* other */
1773 0, /* cost of multiply per each bit set */
1774 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1775 COSTS_N_INSNS (26), /* HI */
1776 COSTS_N_INSNS (42), /* SI */
1777 COSTS_N_INSNS (74), /* DI */
1778 COSTS_N_INSNS (74)}, /* other */
1779 COSTS_N_INSNS (1), /* cost of movsx */
1780 COSTS_N_INSNS (1), /* cost of movzx */
1781 8, /* "large" insn */
1782 17, /* MOVE_RATIO */
1783 4, /* cost for loading QImode using movzbl */
1784 {4, 4, 4}, /* cost of loading integer registers
1785 in QImode, HImode and SImode.
1786 Relative to reg-reg move (2). */
1787 {4, 4, 4}, /* cost of storing integer registers */
1788 4, /* cost of reg,reg fld/fst */
1789 {12, 12, 12}, /* cost of loading fp registers
1790 in SFmode, DFmode and XFmode */
1791 {6, 6, 8}, /* cost of storing fp registers
1792 in SFmode, DFmode and XFmode */
1793 2, /* cost of moving MMX register */
1794 {8, 8}, /* cost of loading MMX registers
1795 in SImode and DImode */
1796 {8, 8}, /* cost of storing MMX registers
1797 in SImode and DImode */
1798 2, /* cost of moving SSE register */
1799 {8, 8, 8}, /* cost of loading SSE registers
1800 in SImode, DImode and TImode */
1801 {8, 8, 8}, /* cost of storing SSE registers
1802 in SImode, DImode and TImode */
1803 5, /* MMX or SSE register to integer */
1804 32, /* size of l1 cache. */
1805 256, /* size of l2 cache. */
1806 64, /* size of prefetch block */
1807 6, /* number of parallel prefetches */
1808 3, /* Branch cost */
1809 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1810 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1811 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1812 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1813 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1814 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1817 1, /* scalar_stmt_cost. */
1818 1, /* scalar load_cost. */
1819 1, /* scalar_store_cost. */
1820 1, /* vec_stmt_cost. */
1821 1, /* vec_to_scalar_cost. */
1822 1, /* scalar_to_vec_cost. */
1823 1, /* vec_align_load_cost. */
1824 2, /* vec_unalign_load_cost. */
1825 1, /* vec_store_cost. */
1826 3, /* cond_taken_branch_cost. */
1827 1, /* cond_not_taken_branch_cost. */
1830 static stringop_algs slm_memcpy[2] = {
1831 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1832 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1833 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1834 static stringop_algs slm_memset[2] = {
1835 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1836 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1837 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1838 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1840 struct processor_costs slm_cost = {
1841 COSTS_N_INSNS (1), /* cost of an add instruction */
1842 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1843 COSTS_N_INSNS (1), /* variable shift costs */
1844 COSTS_N_INSNS (1), /* constant shift costs */
1845 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1846 COSTS_N_INSNS (3), /* HI */
1847 COSTS_N_INSNS (3), /* SI */
1848 COSTS_N_INSNS (4), /* DI */
1849 COSTS_N_INSNS (2)}, /* other */
1850 0, /* cost of multiply per each bit set */
1851 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1852 COSTS_N_INSNS (26), /* HI */
1853 COSTS_N_INSNS (42), /* SI */
1854 COSTS_N_INSNS (74), /* DI */
1855 COSTS_N_INSNS (74)}, /* other */
1856 COSTS_N_INSNS (1), /* cost of movsx */
1857 COSTS_N_INSNS (1), /* cost of movzx */
1858 8, /* "large" insn */
1859 17, /* MOVE_RATIO */
1860 4, /* cost for loading QImode using movzbl */
1861 {4, 4, 4}, /* cost of loading integer registers
1862 in QImode, HImode and SImode.
1863 Relative to reg-reg move (2). */
1864 {4, 4, 4}, /* cost of storing integer registers */
1865 4, /* cost of reg,reg fld/fst */
1866 {12, 12, 12}, /* cost of loading fp registers
1867 in SFmode, DFmode and XFmode */
1868 {6, 6, 8}, /* cost of storing fp registers
1869 in SFmode, DFmode and XFmode */
1870 2, /* cost of moving MMX register */
1871 {8, 8}, /* cost of loading MMX registers
1872 in SImode and DImode */
1873 {8, 8}, /* cost of storing MMX registers
1874 in SImode and DImode */
1875 2, /* cost of moving SSE register */
1876 {8, 8, 8}, /* cost of loading SSE registers
1877 in SImode, DImode and TImode */
1878 {8, 8, 8}, /* cost of storing SSE registers
1879 in SImode, DImode and TImode */
1880 5, /* MMX or SSE register to integer */
1881 32, /* size of l1 cache. */
1882 256, /* size of l2 cache. */
1883 64, /* size of prefetch block */
1884 6, /* number of parallel prefetches */
1885 3, /* Branch cost */
1886 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1887 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1888 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1889 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1890 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1891 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1894 1, /* scalar_stmt_cost. */
1895 1, /* scalar load_cost. */
1896 1, /* scalar_store_cost. */
1897 1, /* vec_stmt_cost. */
1898 4, /* vec_to_scalar_cost. */
1899 1, /* scalar_to_vec_cost. */
1900 1, /* vec_align_load_cost. */
1901 2, /* vec_unalign_load_cost. */
1902 1, /* vec_store_cost. */
1903 3, /* cond_taken_branch_cost. */
1904 1, /* cond_not_taken_branch_cost. */
1907 static stringop_algs intel_memcpy[2] = {
1908 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1909 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1910 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1911 static stringop_algs intel_memset[2] = {
1912 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1913 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1914 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1915 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1917 struct processor_costs intel_cost = {
1918 COSTS_N_INSNS (1), /* cost of an add instruction */
1919 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1920 COSTS_N_INSNS (1), /* variable shift costs */
1921 COSTS_N_INSNS (1), /* constant shift costs */
1922 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1923 COSTS_N_INSNS (3), /* HI */
1924 COSTS_N_INSNS (3), /* SI */
1925 COSTS_N_INSNS (4), /* DI */
1926 COSTS_N_INSNS (2)}, /* other */
1927 0, /* cost of multiply per each bit set */
1928 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1929 COSTS_N_INSNS (26), /* HI */
1930 COSTS_N_INSNS (42), /* SI */
1931 COSTS_N_INSNS (74), /* DI */
1932 COSTS_N_INSNS (74)}, /* other */
1933 COSTS_N_INSNS (1), /* cost of movsx */
1934 COSTS_N_INSNS (1), /* cost of movzx */
1935 8, /* "large" insn */
1936 17, /* MOVE_RATIO */
1937 4, /* cost for loading QImode using movzbl */
1938 {4, 4, 4}, /* cost of loading integer registers
1939 in QImode, HImode and SImode.
1940 Relative to reg-reg move (2). */
1941 {4, 4, 4}, /* cost of storing integer registers */
1942 4, /* cost of reg,reg fld/fst */
1943 {12, 12, 12}, /* cost of loading fp registers
1944 in SFmode, DFmode and XFmode */
1945 {6, 6, 8}, /* cost of storing fp registers
1946 in SFmode, DFmode and XFmode */
1947 2, /* cost of moving MMX register */
1948 {8, 8}, /* cost of loading MMX registers
1949 in SImode and DImode */
1950 {8, 8}, /* cost of storing MMX registers
1951 in SImode and DImode */
1952 2, /* cost of moving SSE register */
1953 {8, 8, 8}, /* cost of loading SSE registers
1954 in SImode, DImode and TImode */
1955 {8, 8, 8}, /* cost of storing SSE registers
1956 in SImode, DImode and TImode */
1957 5, /* MMX or SSE register to integer */
1958 32, /* size of l1 cache. */
1959 256, /* size of l2 cache. */
1960 64, /* size of prefetch block */
1961 6, /* number of parallel prefetches */
1962 3, /* Branch cost */
1963 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1964 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1965 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1966 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1967 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1968 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1971 1, /* scalar_stmt_cost. */
1972 1, /* scalar load_cost. */
1973 1, /* scalar_store_cost. */
1974 1, /* vec_stmt_cost. */
1975 4, /* vec_to_scalar_cost. */
1976 1, /* scalar_to_vec_cost. */
1977 1, /* vec_align_load_cost. */
1978 2, /* vec_unalign_load_cost. */
1979 1, /* vec_store_cost. */
1980 3, /* cond_taken_branch_cost. */
1981 1, /* cond_not_taken_branch_cost. */
1984 /* Generic should produce code tuned for Core-i7 (and newer chips)
1985 and btver1 (and newer chips). */
1987 static stringop_algs generic_memcpy[2] = {
1988 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1989 {-1, libcall, false}}},
1990 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1991 {-1, libcall, false}}}};
1992 static stringop_algs generic_memset[2] = {
1993 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1994 {-1, libcall, false}}},
1995 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1996 {-1, libcall, false}}}};
1998 struct processor_costs generic_cost = {
1999 COSTS_N_INSNS (1), /* cost of an add instruction */
2000 /* On all chips taken into consideration lea is 2 cycles and more. With
2001 this cost however our current implementation of synth_mult results in
2002 use of unnecessary temporary registers causing regression on several
2003 SPECfp benchmarks. */
2004 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2005 COSTS_N_INSNS (1), /* variable shift costs */
2006 COSTS_N_INSNS (1), /* constant shift costs */
2007 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2008 COSTS_N_INSNS (4), /* HI */
2009 COSTS_N_INSNS (3), /* SI */
2010 COSTS_N_INSNS (4), /* DI */
2011 COSTS_N_INSNS (2)}, /* other */
2012 0, /* cost of multiply per each bit set */
2013 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2014 COSTS_N_INSNS (26), /* HI */
2015 COSTS_N_INSNS (42), /* SI */
2016 COSTS_N_INSNS (74), /* DI */
2017 COSTS_N_INSNS (74)}, /* other */
2018 COSTS_N_INSNS (1), /* cost of movsx */
2019 COSTS_N_INSNS (1), /* cost of movzx */
2020 8, /* "large" insn */
2021 17, /* MOVE_RATIO */
2022 4, /* cost for loading QImode using movzbl */
2023 {4, 4, 4}, /* cost of loading integer registers
2024 in QImode, HImode and SImode.
2025 Relative to reg-reg move (2). */
2026 {4, 4, 4}, /* cost of storing integer registers */
2027 4, /* cost of reg,reg fld/fst */
2028 {12, 12, 12}, /* cost of loading fp registers
2029 in SFmode, DFmode and XFmode */
2030 {6, 6, 8}, /* cost of storing fp registers
2031 in SFmode, DFmode and XFmode */
2032 2, /* cost of moving MMX register */
2033 {8, 8}, /* cost of loading MMX registers
2034 in SImode and DImode */
2035 {8, 8}, /* cost of storing MMX registers
2036 in SImode and DImode */
2037 2, /* cost of moving SSE register */
2038 {8, 8, 8}, /* cost of loading SSE registers
2039 in SImode, DImode and TImode */
2040 {8, 8, 8}, /* cost of storing SSE registers
2041 in SImode, DImode and TImode */
2042 5, /* MMX or SSE register to integer */
2043 32, /* size of l1 cache. */
2044 512, /* size of l2 cache. */
2045 64, /* size of prefetch block */
2046 6, /* number of parallel prefetches */
2047 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
2048 value is increased to perhaps more appropriate value of 5. */
2049 3, /* Branch cost */
2050 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2051 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2052 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2053 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2054 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2055 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2058 1, /* scalar_stmt_cost. */
2059 1, /* scalar load_cost. */
2060 1, /* scalar_store_cost. */
2061 1, /* vec_stmt_cost. */
2062 1, /* vec_to_scalar_cost. */
2063 1, /* scalar_to_vec_cost. */
2064 1, /* vec_align_load_cost. */
2065 2, /* vec_unalign_load_cost. */
2066 1, /* vec_store_cost. */
2067 3, /* cond_taken_branch_cost. */
2068 1, /* cond_not_taken_branch_cost. */
2071 /* core_cost should produce code tuned for Core familly of CPUs. */
2072 static stringop_algs core_memcpy[2] = {
2073 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
2074 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
2075 {-1, libcall, false}}}};
2076 static stringop_algs core_memset[2] = {
2077 {libcall, {{6, loop_1_byte, true},
2079 {8192, rep_prefix_4_byte, true},
2080 {-1, libcall, false}}},
2081 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
2082 {-1, libcall, false}}}};
2085 struct processor_costs core_cost = {
2086 COSTS_N_INSNS (1), /* cost of an add instruction */
2087 /* On all chips taken into consideration lea is 2 cycles and more. With
2088 this cost however our current implementation of synth_mult results in
2089 use of unnecessary temporary registers causing regression on several
2090 SPECfp benchmarks. */
2091 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2092 COSTS_N_INSNS (1), /* variable shift costs */
2093 COSTS_N_INSNS (1), /* constant shift costs */
2094 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2095 COSTS_N_INSNS (4), /* HI */
2096 COSTS_N_INSNS (3), /* SI */
2097 COSTS_N_INSNS (4), /* DI */
2098 COSTS_N_INSNS (2)}, /* other */
2099 0, /* cost of multiply per each bit set */
2100 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2101 COSTS_N_INSNS (26), /* HI */
2102 COSTS_N_INSNS (42), /* SI */
2103 COSTS_N_INSNS (74), /* DI */
2104 COSTS_N_INSNS (74)}, /* other */
2105 COSTS_N_INSNS (1), /* cost of movsx */
2106 COSTS_N_INSNS (1), /* cost of movzx */
2107 8, /* "large" insn */
2108 17, /* MOVE_RATIO */
2109 4, /* cost for loading QImode using movzbl */
2110 {4, 4, 4}, /* cost of loading integer registers
2111 in QImode, HImode and SImode.
2112 Relative to reg-reg move (2). */
2113 {4, 4, 4}, /* cost of storing integer registers */
2114 4, /* cost of reg,reg fld/fst */
2115 {12, 12, 12}, /* cost of loading fp registers
2116 in SFmode, DFmode and XFmode */
2117 {6, 6, 8}, /* cost of storing fp registers
2118 in SFmode, DFmode and XFmode */
2119 2, /* cost of moving MMX register */
2120 {8, 8}, /* cost of loading MMX registers
2121 in SImode and DImode */
2122 {8, 8}, /* cost of storing MMX registers
2123 in SImode and DImode */
2124 2, /* cost of moving SSE register */
2125 {8, 8, 8}, /* cost of loading SSE registers
2126 in SImode, DImode and TImode */
2127 {8, 8, 8}, /* cost of storing SSE registers
2128 in SImode, DImode and TImode */
2129 5, /* MMX or SSE register to integer */
2130 64, /* size of l1 cache. */
2131 512, /* size of l2 cache. */
2132 64, /* size of prefetch block */
2133 6, /* number of parallel prefetches */
2134 /* FIXME perhaps more appropriate value is 5. */
2135 3, /* Branch cost */
2136 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2137 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2138 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2139 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2140 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2141 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2144 1, /* scalar_stmt_cost. */
2145 1, /* scalar load_cost. */
2146 1, /* scalar_store_cost. */
2147 1, /* vec_stmt_cost. */
2148 1, /* vec_to_scalar_cost. */
2149 1, /* scalar_to_vec_cost. */
2150 1, /* vec_align_load_cost. */
2151 2, /* vec_unalign_load_cost. */
2152 1, /* vec_store_cost. */
2153 3, /* cond_taken_branch_cost. */
2154 1, /* cond_not_taken_branch_cost. */
2158 /* Set by -mtune. */
2159 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2161 /* Set by -mtune or -Os. */
2162 const struct processor_costs *ix86_cost = &pentium_cost;
2164 /* Processor feature/optimization bitmasks. */
2165 #define m_386 (1<<PROCESSOR_I386)
2166 #define m_486 (1<<PROCESSOR_I486)
2167 #define m_PENT (1<<PROCESSOR_PENTIUM)
2168 #define m_LAKEMONT (1<<PROCESSOR_LAKEMONT)
2169 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2170 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2171 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2172 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2173 #define m_CORE2 (1<<PROCESSOR_CORE2)
2174 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2175 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2176 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2177 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2178 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2179 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2180 #define m_KNL (1<<PROCESSOR_KNL)
2181 #define m_SKYLAKE_AVX512 (1<<PROCESSOR_SKYLAKE_AVX512)
2182 #define m_INTEL (1<<PROCESSOR_INTEL)
2184 #define m_GEODE (1<<PROCESSOR_GEODE)
2185 #define m_K6 (1<<PROCESSOR_K6)
2186 #define m_K6_GEODE (m_K6 | m_GEODE)
2187 #define m_K8 (1<<PROCESSOR_K8)
2188 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2189 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2190 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2191 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2192 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2193 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2194 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2195 #define m_ZNVER1 (1<<PROCESSOR_ZNVER1)
2196 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2197 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2198 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2199 #define m_BTVER (m_BTVER1 | m_BTVER2)
2200 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \
2203 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2205 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2207 #define DEF_TUNE(tune, name, selector) name,
2208 #include "x86-tune.def"
2212 /* Feature tests against the various tunings. */
2213 unsigned char ix86_tune_features[X86_TUNE_LAST];
2215 /* Feature tests against the various tunings used to create ix86_tune_features
2216 based on the processor mask. */
2217 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2219 #define DEF_TUNE(tune, name, selector) selector,
2220 #include "x86-tune.def"
2224 /* Feature tests against the various architecture variations. */
2225 unsigned char ix86_arch_features[X86_ARCH_LAST];
2227 /* Feature tests against the various architecture variations, used to create
2228 ix86_arch_features based on the processor mask. */
2229 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2230 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2231 ~(m_386 | m_486 | m_PENT | m_LAKEMONT | m_K6),
2233 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2236 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2239 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2242 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2246 /* In case the average insn count for single function invocation is
2247 lower than this constant, emit fast (but longer) prologue and
2249 #define FAST_PROLOGUE_INSN_COUNT 20
2251 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2252 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2253 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2254 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2256 /* Array of the smallest class containing reg number REGNO, indexed by
2257 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2259 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2261 /* ax, dx, cx, bx */
2262 AREG, DREG, CREG, BREG,
2263 /* si, di, bp, sp */
2264 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2266 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2267 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2270 /* flags, fpsr, fpcr, frame */
2271 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2273 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2276 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2279 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2280 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2281 /* SSE REX registers */
2282 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2284 /* AVX-512 SSE registers */
2285 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2286 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2287 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2288 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2289 /* Mask registers. */
2290 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2291 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2292 /* MPX bound registers */
2293 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2296 /* The "default" register map used in 32bit mode. */
2298 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2300 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2301 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2302 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2303 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2304 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2305 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2306 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2307 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2308 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2309 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2310 101, 102, 103, 104, /* bound registers */
2313 /* The "default" register map used in 64bit mode. */
2315 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2317 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2318 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2319 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2320 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2321 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2322 8,9,10,11,12,13,14,15, /* extended integer registers */
2323 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2324 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2325 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2326 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2327 126, 127, 128, 129, /* bound registers */
2330 /* Define the register numbers to be used in Dwarf debugging information.
2331 The SVR4 reference port C compiler uses the following register numbers
2332 in its Dwarf output code:
2333 0 for %eax (gcc regno = 0)
2334 1 for %ecx (gcc regno = 2)
2335 2 for %edx (gcc regno = 1)
2336 3 for %ebx (gcc regno = 3)
2337 4 for %esp (gcc regno = 7)
2338 5 for %ebp (gcc regno = 6)
2339 6 for %esi (gcc regno = 4)
2340 7 for %edi (gcc regno = 5)
2341 The following three DWARF register numbers are never generated by
2342 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2343 believes these numbers have these meanings.
2344 8 for %eip (no gcc equivalent)
2345 9 for %eflags (gcc regno = 17)
2346 10 for %trapno (no gcc equivalent)
2347 It is not at all clear how we should number the FP stack registers
2348 for the x86 architecture. If the version of SDB on x86/svr4 were
2349 a bit less brain dead with respect to floating-point then we would
2350 have a precedent to follow with respect to DWARF register numbers
2351 for x86 FP registers, but the SDB on x86/svr4 is so completely
2352 broken with respect to FP registers that it is hardly worth thinking
2353 of it as something to strive for compatibility with.
2354 The version of x86/svr4 SDB I have at the moment does (partially)
2355 seem to believe that DWARF register number 11 is associated with
2356 the x86 register %st(0), but that's about all. Higher DWARF
2357 register numbers don't seem to be associated with anything in
2358 particular, and even for DWARF regno 11, SDB only seems to under-
2359 stand that it should say that a variable lives in %st(0) (when
2360 asked via an `=' command) if we said it was in DWARF regno 11,
2361 but SDB still prints garbage when asked for the value of the
2362 variable in question (via a `/' command).
2363 (Also note that the labels SDB prints for various FP stack regs
2364 when doing an `x' command are all wrong.)
2365 Note that these problems generally don't affect the native SVR4
2366 C compiler because it doesn't allow the use of -O with -g and
2367 because when it is *not* optimizing, it allocates a memory
2368 location for each floating-point variable, and the memory
2369 location is what gets described in the DWARF AT_location
2370 attribute for the variable in question.
2371 Regardless of the severe mental illness of the x86/svr4 SDB, we
2372 do something sensible here and we use the following DWARF
2373 register numbers. Note that these are all stack-top-relative
2375 11 for %st(0) (gcc regno = 8)
2376 12 for %st(1) (gcc regno = 9)
2377 13 for %st(2) (gcc regno = 10)
2378 14 for %st(3) (gcc regno = 11)
2379 15 for %st(4) (gcc regno = 12)
2380 16 for %st(5) (gcc regno = 13)
2381 17 for %st(6) (gcc regno = 14)
2382 18 for %st(7) (gcc regno = 15)
2384 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2386 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2387 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2388 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2389 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2390 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2391 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2392 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2393 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2394 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2395 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2396 101, 102, 103, 104, /* bound registers */
2399 /* Define parameter passing and return registers. */
2401 static int const x86_64_int_parameter_registers[6] =
2403 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2406 static int const x86_64_ms_abi_int_parameter_registers[4] =
2408 CX_REG, DX_REG, R8_REG, R9_REG
2411 static int const x86_64_int_return_registers[4] =
2413 AX_REG, DX_REG, DI_REG, SI_REG
2416 /* Additional registers that are clobbered by SYSV calls. */
2418 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2422 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2423 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2426 /* Define the structure for the machine field in struct function. */
2428 struct GTY(()) stack_local_entry {
2429 unsigned short mode;
2432 struct stack_local_entry *next;
2435 /* Structure describing stack frame layout.
2436 Stack grows downward:
2442 saved static chain if ix86_static_chain_on_stack
2444 saved frame pointer if frame_pointer_needed
2445 <- HARD_FRAME_POINTER
2451 <- sse_regs_save_offset
2454 [va_arg registers] |
2458 [padding2] | = to_allocate
2467 int outgoing_arguments_size;
2469 /* The offsets relative to ARG_POINTER. */
2470 HOST_WIDE_INT frame_pointer_offset;
2471 HOST_WIDE_INT hard_frame_pointer_offset;
2472 HOST_WIDE_INT stack_pointer_offset;
2473 HOST_WIDE_INT hfp_save_offset;
2474 HOST_WIDE_INT reg_save_offset;
2475 HOST_WIDE_INT sse_reg_save_offset;
2477 /* When save_regs_using_mov is set, emit prologue using
2478 move instead of push instructions. */
2479 bool save_regs_using_mov;
2482 /* Which cpu are we scheduling for. */
2483 enum attr_cpu ix86_schedule;
2485 /* Which cpu are we optimizing for. */
2486 enum processor_type ix86_tune;
2488 /* Which instruction set architecture to use. */
2489 enum processor_type ix86_arch;
2491 /* True if processor has SSE prefetch instruction. */
2492 unsigned char x86_prefetch_sse;
2494 /* -mstackrealign option */
2495 static const char ix86_force_align_arg_pointer_string[]
2496 = "force_align_arg_pointer";
2498 static rtx (*ix86_gen_leave) (void);
2499 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2500 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2501 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2502 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2503 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2504 static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
2505 static rtx (*ix86_gen_clzero) (rtx);
2506 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2507 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2508 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2509 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2510 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2511 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2513 /* Preferred alignment for stack boundary in bits. */
2514 unsigned int ix86_preferred_stack_boundary;
2516 /* Alignment for incoming stack boundary in bits specified at
2518 static unsigned int ix86_user_incoming_stack_boundary;
2520 /* Default alignment for incoming stack boundary in bits. */
2521 static unsigned int ix86_default_incoming_stack_boundary;
2523 /* Alignment for incoming stack boundary in bits. */
2524 unsigned int ix86_incoming_stack_boundary;
2526 /* Calling abi specific va_list type nodes. */
2527 static GTY(()) tree sysv_va_list_type_node;
2528 static GTY(()) tree ms_va_list_type_node;
2530 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2531 char internal_label_prefix[16];
2532 int internal_label_prefix_len;
2534 /* Fence to use after loop using movnt. */
2537 /* Register class used for passing given 64bit part of the argument.
2538 These represent classes as documented by the PS ABI, with the exception
2539 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2540 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2542 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2543 whenever possible (upper half does contain padding). */
2544 enum x86_64_reg_class
2547 X86_64_INTEGER_CLASS,
2548 X86_64_INTEGERSI_CLASS,
2555 X86_64_COMPLEX_X87_CLASS,
2559 #define MAX_CLASSES 8
2561 /* Table of constants used by fldpi, fldln2, etc.... */
2562 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2563 static bool ext_80387_constants_init = 0;
2566 static struct machine_function * ix86_init_machine_status (void);
2567 static rtx ix86_function_value (const_tree, const_tree, bool);
2568 static bool ix86_function_value_regno_p (const unsigned int);
2569 static unsigned int ix86_function_arg_boundary (machine_mode,
2571 static rtx ix86_static_chain (const_tree, bool);
2572 static int ix86_function_regparm (const_tree, const_tree);
2573 static void ix86_compute_frame_layout (struct ix86_frame *);
2574 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2576 static void ix86_add_new_builtins (HOST_WIDE_INT);
2577 static tree ix86_canonical_va_list_type (tree);
2578 static void predict_jump (int);
2579 static unsigned int split_stack_prologue_scratch_regno (void);
2580 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2582 enum ix86_function_specific_strings
2584 IX86_FUNCTION_SPECIFIC_ARCH,
2585 IX86_FUNCTION_SPECIFIC_TUNE,
2586 IX86_FUNCTION_SPECIFIC_MAX
2589 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2590 const char *, enum fpmath_unit, bool);
2591 static void ix86_function_specific_save (struct cl_target_option *,
2592 struct gcc_options *opts);
2593 static void ix86_function_specific_restore (struct gcc_options *opts,
2594 struct cl_target_option *);
2595 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2596 static void ix86_function_specific_print (FILE *, int,
2597 struct cl_target_option *);
2598 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2599 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2600 struct gcc_options *,
2601 struct gcc_options *,
2602 struct gcc_options *);
2603 static bool ix86_can_inline_p (tree, tree);
2604 static void ix86_set_current_function (tree);
2605 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2607 static enum calling_abi ix86_function_abi (const_tree);
2610 #ifndef SUBTARGET32_DEFAULT_CPU
2611 #define SUBTARGET32_DEFAULT_CPU "i386"
2614 /* Whether -mtune= or -march= were specified */
2615 static int ix86_tune_defaulted;
2616 static int ix86_arch_specified;
2618 /* Vectorization library interface and handlers. */
2619 static tree (*ix86_veclib_handler) (combined_fn, tree, tree);
2621 static tree ix86_veclibabi_svml (combined_fn, tree, tree);
2622 static tree ix86_veclibabi_acml (combined_fn, tree, tree);
2624 /* Processor target table, indexed by processor number */
2627 const char *const name; /* processor name */
2628 const struct processor_costs *cost; /* Processor costs */
2629 const int align_loop; /* Default alignments. */
2630 const int align_loop_max_skip;
2631 const int align_jump;
2632 const int align_jump_max_skip;
2633 const int align_func;
2636 /* This table must be in sync with enum processor_type in i386.h. */
2637 static const struct ptt processor_target_table[PROCESSOR_max] =
2639 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2640 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2641 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2642 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2643 {"lakemont", &lakemont_cost, 16, 7, 16, 7, 16},
2644 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2645 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2646 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2647 {"core2", &core_cost, 16, 10, 16, 10, 16},
2648 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2649 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2650 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2651 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2652 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2653 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2654 {"skylake-avx512", &core_cost, 16, 10, 16, 10, 16},
2655 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2656 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2657 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2658 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2659 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2660 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2661 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2662 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2663 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2664 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2665 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2666 {"btver2", &btver2_cost, 16, 10, 16, 7, 11},
2667 {"znver1", &znver1_cost, 16, 10, 16, 7, 11}
2671 rest_of_handle_insert_vzeroupper (void)
2675 /* vzeroupper instructions are inserted immediately after reload to
2676 account for possible spills from 256bit registers. The pass
2677 reuses mode switching infrastructure by re-running mode insertion
2678 pass, so disable entities that have already been processed. */
2679 for (i = 0; i < MAX_386_ENTITIES; i++)
2680 ix86_optimize_mode_switching[i] = 0;
2682 ix86_optimize_mode_switching[AVX_U128] = 1;
2684 /* Call optimize_mode_switching. */
2685 g->get_passes ()->execute_pass_mode_switching ();
2689 /* Return 1 if INSN uses or defines a hard register.
2690 Hard register uses in a memory address are ignored.
2691 Clobbers and flags definitions are ignored. */
2694 has_non_address_hard_reg (rtx_insn *insn)
2697 FOR_EACH_INSN_DEF (ref, insn)
2698 if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
2699 && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
2700 && DF_REF_REGNO (ref) != FLAGS_REG)
2703 FOR_EACH_INSN_USE (ref, insn)
2704 if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
2710 /* Check if comparison INSN may be transformed
2711 into vector comparison. Currently we transform
2712 zero checks only which look like:
2714 (set (reg:CCZ 17 flags)
2715 (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
2716 (subreg:SI (reg:DI x) 0))
2717 (const_int 0 [0]))) */
2720 convertible_comparison_p (rtx_insn *insn)
2725 rtx def_set = single_set (insn);
2727 gcc_assert (def_set);
2729 rtx src = SET_SRC (def_set);
2730 rtx dst = SET_DEST (def_set);
2732 gcc_assert (GET_CODE (src) == COMPARE);
2734 if (GET_CODE (dst) != REG
2735 || REGNO (dst) != FLAGS_REG
2736 || GET_MODE (dst) != CCZmode)
2739 rtx op1 = XEXP (src, 0);
2740 rtx op2 = XEXP (src, 1);
2742 if (op2 != CONST0_RTX (GET_MODE (op2)))
2745 if (GET_CODE (op1) != IOR)
2748 op2 = XEXP (op1, 1);
2749 op1 = XEXP (op1, 0);
2753 || GET_MODE (op1) != SImode
2754 || GET_MODE (op2) != SImode
2755 || ((SUBREG_BYTE (op1) != 0
2756 || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode))
2757 && (SUBREG_BYTE (op2) != 0
2758 || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode))))
2761 op1 = SUBREG_REG (op1);
2762 op2 = SUBREG_REG (op2);
2766 || GET_MODE (op1) != DImode)
2772 /* Return 1 if INSN may be converted into vector
2776 scalar_to_vector_candidate_p (rtx_insn *insn)
2778 rtx def_set = single_set (insn);
2783 if (has_non_address_hard_reg (insn))
2786 rtx src = SET_SRC (def_set);
2787 rtx dst = SET_DEST (def_set);
2789 if (GET_CODE (src) == COMPARE)
2790 return convertible_comparison_p (insn);
2792 /* We are interested in DImode promotion only. */
2793 if (GET_MODE (src) != DImode
2794 || GET_MODE (dst) != DImode)
2797 if (!REG_P (dst) && !MEM_P (dst))
2800 switch (GET_CODE (src))
2819 if (!REG_P (XEXP (src, 0)) && !MEM_P (XEXP (src, 0))
2820 /* Check for andnot case. */
2821 && (GET_CODE (src) != AND
2822 || GET_CODE (XEXP (src, 0)) != NOT
2823 || !REG_P (XEXP (XEXP (src, 0), 0))))
2826 if (!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
2829 if (GET_MODE (XEXP (src, 0)) != DImode
2830 || GET_MODE (XEXP (src, 1)) != DImode)
2836 /* For a given bitmap of insn UIDs scans all instruction and
2837 remove insn from CANDIDATES in case it has both convertible
2838 and not convertible definitions.
2840 All insns in a bitmap are conversion candidates according to
2841 scalar_to_vector_candidate_p. Currently it implies all insns
2845 remove_non_convertible_regs (bitmap candidates)
2849 bitmap regs = BITMAP_ALLOC (NULL);
2851 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
2853 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
2854 rtx reg = SET_DEST (def_set);
2857 || bitmap_bit_p (regs, REGNO (reg))
2858 || HARD_REGISTER_P (reg))
2861 for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg));
2863 def = DF_REF_NEXT_REG (def))
2865 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
2869 "r%d has non convertible definition in insn %d\n",
2870 REGNO (reg), DF_REF_INSN_UID (def));
2872 bitmap_set_bit (regs, REGNO (reg));
2878 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
2880 for (df_ref def = DF_REG_DEF_CHAIN (id);
2882 def = DF_REF_NEXT_REG (def))
2883 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
2886 fprintf (dump_file, "Removing insn %d from candidates list\n",
2887 DF_REF_INSN_UID (def));
2889 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
2902 static unsigned max_id;
2904 /* ID of a chain. */
2905 unsigned int chain_id;
2906 /* A queue of instructions to be included into a chain. */
2908 /* Instructions included into a chain. */
2910 /* All registers defined by a chain. */
2912 /* Registers used in both vector and sclar modes. */
2915 void build (bitmap candidates, unsigned insn_uid);
2916 int compute_convert_gain ();
2920 void add_insn (bitmap candidates, unsigned insn_uid);
2921 void add_to_queue (unsigned insn_uid);
2922 void mark_dual_mode_def (df_ref def);
2923 void analyze_register_chain (bitmap candidates, df_ref ref);
2924 rtx replace_with_subreg (rtx x, rtx reg, rtx subreg);
2925 void emit_conversion_insns (rtx insns, rtx_insn *pos);
2926 void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg);
2927 void convert_insn (rtx_insn *insn);
2928 void convert_op (rtx *op, rtx_insn *insn);
2929 void convert_reg (unsigned regno);
2930 void make_vector_copies (unsigned regno);
2933 unsigned scalar_chain::max_id = 0;
2935 /* Initialize new chain. */
2937 scalar_chain::scalar_chain ()
2939 chain_id = ++max_id;
2942 fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
2944 bitmap_obstack_initialize (NULL);
2945 insns = BITMAP_ALLOC (NULL);
2946 defs = BITMAP_ALLOC (NULL);
2947 defs_conv = BITMAP_ALLOC (NULL);
2951 /* Free chain's data. */
2953 scalar_chain::~scalar_chain ()
2955 BITMAP_FREE (insns);
2957 BITMAP_FREE (defs_conv);
2958 bitmap_obstack_release (NULL);
2961 /* Add instruction into chains' queue. */
2964 scalar_chain::add_to_queue (unsigned insn_uid)
2966 if (bitmap_bit_p (insns, insn_uid)
2967 || bitmap_bit_p (queue, insn_uid))
2971 fprintf (dump_file, " Adding insn %d into chain's #%d queue\n",
2972 insn_uid, chain_id);
2973 bitmap_set_bit (queue, insn_uid);
2976 /* Mark register defined by DEF as requiring conversion. */
2979 scalar_chain::mark_dual_mode_def (df_ref def)
2981 gcc_assert (DF_REF_REG_DEF_P (def));
2983 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def)))
2988 " Mark r%d def in insn %d as requiring both modes in chain #%d\n",
2989 DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
2991 bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
2994 /* Check REF's chain to add new insns into a queue
2995 and find registers requiring conversion. */
2998 scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
3002 gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))
3003 || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)));
3004 add_to_queue (DF_REF_INSN_UID (ref));
3006 for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
3008 unsigned uid = DF_REF_INSN_UID (chain->ref);
3010 if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
3013 if (!DF_REF_REG_MEM_P (chain->ref))
3015 if (bitmap_bit_p (insns, uid))
3018 if (bitmap_bit_p (candidates, uid))
3025 if (DF_REF_REG_DEF_P (chain->ref))
3028 fprintf (dump_file, " r%d def in insn %d isn't convertible\n",
3029 DF_REF_REGNO (chain->ref), uid);
3030 mark_dual_mode_def (chain->ref);
3035 fprintf (dump_file, " r%d use in insn %d isn't convertible\n",
3036 DF_REF_REGNO (chain->ref), uid);
3037 mark_dual_mode_def (ref);
3042 /* Add instruction into a chain. */
3045 scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
3047 if (bitmap_bit_p (insns, insn_uid))
3051 fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id);
3053 bitmap_set_bit (insns, insn_uid);
3055 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
3056 rtx def_set = single_set (insn);
3057 if (def_set && REG_P (SET_DEST (def_set))
3058 && !HARD_REGISTER_P (SET_DEST (def_set)))
3059 bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
3063 for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
3064 if (!HARD_REGISTER_P (DF_REF_REG (ref)))
3065 for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref));
3067 def = DF_REF_NEXT_REG (def))
3068 analyze_register_chain (candidates, def);
3069 for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
3070 if (!DF_REF_REG_MEM_P (ref))
3071 analyze_register_chain (candidates, ref);
3074 /* Build new chain starting from insn INSN_UID recursively
3075 adding all dependent uses and definitions. */
3078 scalar_chain::build (bitmap candidates, unsigned insn_uid)
3080 queue = BITMAP_ALLOC (NULL);
3081 bitmap_set_bit (queue, insn_uid);
3084 fprintf (dump_file, "Building chain #%d...\n", chain_id);
3086 while (!bitmap_empty_p (queue))
3088 insn_uid = bitmap_first_set_bit (queue);
3089 bitmap_clear_bit (queue, insn_uid);
3090 bitmap_clear_bit (candidates, insn_uid);
3091 add_insn (candidates, insn_uid);
3096 fprintf (dump_file, "Collected chain #%d...\n", chain_id);
3097 fprintf (dump_file, " insns: ");
3098 dump_bitmap (dump_file, insns);
3099 if (!bitmap_empty_p (defs_conv))
3103 const char *comma = "";
3104 fprintf (dump_file, " defs to convert: ");
3105 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
3107 fprintf (dump_file, "%sr%d", comma, id);
3110 fprintf (dump_file, "\n");
3114 BITMAP_FREE (queue);
3117 /* Compute a gain for chain conversion. */
3120 scalar_chain::compute_convert_gain ()
3128 fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
3130 EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
3132 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
3133 rtx def_set = single_set (insn);
3134 rtx src = SET_SRC (def_set);
3135 rtx dst = SET_DEST (def_set);
3137 if (REG_P (src) && REG_P (dst))
3138 gain += COSTS_N_INSNS (2) - ix86_cost->sse_move;
3139 else if (REG_P (src) && MEM_P (dst))
3140 gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
3141 else if (MEM_P (src) && REG_P (dst))
3142 gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1];
3143 else if (GET_CODE (src) == PLUS
3144 || GET_CODE (src) == MINUS
3145 || GET_CODE (src) == IOR
3146 || GET_CODE (src) == XOR
3147 || GET_CODE (src) == AND)
3148 gain += ix86_cost->add;
3149 else if (GET_CODE (src) == COMPARE)
3151 /* Assume comparison cost is the same. */
3158 fprintf (dump_file, " Instruction conversion gain: %d\n", gain);
3160 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi)
3161 cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->mmxsse_to_integer;
3164 fprintf (dump_file, " Registers conversion cost: %d\n", cost);
3169 fprintf (dump_file, " Total gain: %d\n", gain);
3174 /* Replace REG in X with a V2DI subreg of NEW_REG. */
3177 scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg)
3180 return gen_rtx_SUBREG (V2DImode, new_reg, 0);
3182 const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
3184 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3187 XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg);
3188 else if (fmt[i] == 'E')
3189 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3190 XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j),
3197 /* Replace REG in INSN with a V2DI subreg of NEW_REG. */
3200 scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx new_reg)
3202 replace_with_subreg (single_set (insn), reg, new_reg);
3205 /* Insert generated conversion instruction sequence INSNS
3206 after instruction AFTER. New BB may be required in case
3207 instruction has EH region attached. */
3210 scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
3212 if (!control_flow_insn_p (after))
3214 emit_insn_after (insns, after);
3218 basic_block bb = BLOCK_FOR_INSN (after);
3219 edge e = find_fallthru_edge (bb->succs);
3222 basic_block new_bb = split_edge (e);
3223 emit_insn_after (insns, BB_HEAD (new_bb));
3226 /* Make vector copies for all register REGNO definitions
3227 and replace its uses in a chain. */
3230 scalar_chain::make_vector_copies (unsigned regno)
3232 rtx reg = regno_reg_rtx[regno];
3233 rtx vreg = gen_reg_rtx (DImode);
3236 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3237 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
3239 rtx_insn *insn = DF_REF_INSN (ref);
3244 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
3245 CONST0_RTX (V4SImode),
3246 gen_rtx_SUBREG (SImode, reg, 0)));
3247 emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
3248 gen_rtx_SUBREG (V4SImode, vreg, 0),
3249 gen_rtx_SUBREG (SImode, reg, 4),
3252 else if (TARGET_INTER_UNIT_MOVES_TO_VEC)
3254 rtx tmp = gen_reg_rtx (DImode);
3255 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
3256 CONST0_RTX (V4SImode),
3257 gen_rtx_SUBREG (SImode, reg, 0)));
3258 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
3259 CONST0_RTX (V4SImode),
3260 gen_rtx_SUBREG (SImode, reg, 4)));
3261 emit_insn (gen_vec_interleave_lowv4si
3262 (gen_rtx_SUBREG (V4SImode, vreg, 0),
3263 gen_rtx_SUBREG (V4SImode, vreg, 0),
3264 gen_rtx_SUBREG (V4SImode, tmp, 0)));
3268 rtx tmp = assign_386_stack_local (DImode, SLOT_TEMP);
3269 emit_move_insn (adjust_address (tmp, SImode, 0),
3270 gen_rtx_SUBREG (SImode, reg, 0));
3271 emit_move_insn (adjust_address (tmp, SImode, 4),
3272 gen_rtx_SUBREG (SImode, reg, 4));
3273 emit_move_insn (vreg, tmp);
3275 rtx_insn *seq = get_insns ();
3277 emit_conversion_insns (seq, insn);
3281 " Copied r%d to a vector register r%d for insn %d\n",
3282 regno, REGNO (vreg), DF_REF_INSN_UID (ref));
3285 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3286 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
3288 replace_with_subreg_in_insn (DF_REF_INSN (ref), reg, vreg);
3291 fprintf (dump_file, " Replaced r%d with r%d in insn %d\n",
3292 regno, REGNO (vreg), DF_REF_INSN_UID (ref));
3296 /* Convert all definitions of register REGNO
3297 and fix its uses. Scalar copies may be created
3298 in case register is used in not convertible insn. */
3301 scalar_chain::convert_reg (unsigned regno)
3303 bool scalar_copy = bitmap_bit_p (defs_conv, regno);
3304 rtx reg = regno_reg_rtx[regno];
3305 rtx scopy = NULL_RTX;
3309 conv = BITMAP_ALLOC (NULL);
3310 bitmap_copy (conv, insns);
3313 scopy = gen_reg_rtx (DImode);
3315 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3317 rtx_insn *insn = DF_REF_INSN (ref);
3318 rtx def_set = single_set (insn);
3319 rtx src = SET_SRC (def_set);
3320 rtx reg = DF_REF_REG (ref);
3324 replace_with_subreg_in_insn (insn, reg, reg);
3325 bitmap_clear_bit (conv, INSN_UID (insn));
3330 rtx vcopy = gen_reg_rtx (V2DImode);
3333 if (TARGET_INTER_UNIT_MOVES_FROM_VEC)
3335 emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0));
3336 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
3337 gen_rtx_SUBREG (SImode, vcopy, 0));
3338 emit_move_insn (vcopy,
3339 gen_rtx_LSHIFTRT (V2DImode, vcopy, GEN_INT (32)));
3340 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
3341 gen_rtx_SUBREG (SImode, vcopy, 0));
3345 rtx tmp = assign_386_stack_local (DImode, SLOT_TEMP);
3346 emit_move_insn (tmp, reg);
3347 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
3348 adjust_address (tmp, SImode, 0));
3349 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
3350 adjust_address (tmp, SImode, 4));
3352 rtx_insn *seq = get_insns ();
3354 emit_conversion_insns (seq, insn);
3358 " Copied r%d to a scalar register r%d for insn %d\n",
3359 regno, REGNO (scopy), INSN_UID (insn));
3363 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3364 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
3366 if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref)))
3368 rtx def_set = single_set (DF_REF_INSN (ref));
3369 if (!MEM_P (SET_DEST (def_set))
3370 || !REG_P (SET_SRC (def_set)))
3371 replace_with_subreg_in_insn (DF_REF_INSN (ref), reg, reg);
3372 bitmap_clear_bit (conv, DF_REF_INSN_UID (ref));
3375 /* Skip debug insns and uninitialized uses. */
3376 else if (DF_REF_CHAIN (ref)
3377 && NONDEBUG_INSN_P (DF_REF_INSN (ref)))
3380 replace_rtx (DF_REF_INSN (ref), reg, scopy);
3381 df_insn_rescan (DF_REF_INSN (ref));
3387 /* Convert operand OP in INSN. All register uses
3388 are converted during registers conversion.
3389 Therefore we should just handle memory operands. */
3392 scalar_chain::convert_op (rtx *op, rtx_insn *insn)
3394 *op = copy_rtx_if_shared (*op);
3396 if (GET_CODE (*op) == NOT)
3398 convert_op (&XEXP (*op, 0), insn);
3399 PUT_MODE (*op, V2DImode);
3401 else if (MEM_P (*op))
3403 rtx tmp = gen_reg_rtx (DImode);
3405 emit_insn_before (gen_move_insn (tmp, *op), insn);
3406 *op = gen_rtx_SUBREG (V2DImode, tmp, 0);
3409 fprintf (dump_file, " Preloading operand for insn %d into r%d\n",
3410 INSN_UID (insn), REGNO (tmp));
3412 else if (REG_P (*op))
3414 /* We may have not converted register usage in case
3415 this register has no definition. Otherwise it
3416 should be converted in convert_reg. */
3418 FOR_EACH_INSN_USE (ref, insn)
3419 if (DF_REF_REGNO (ref) == REGNO (*op))
3421 gcc_assert (!DF_REF_CHAIN (ref));
3424 *op = gen_rtx_SUBREG (V2DImode, *op, 0);
3428 gcc_assert (SUBREG_P (*op));
3429 gcc_assert (GET_MODE (*op) == V2DImode);
3433 /* Convert INSN to vector mode. */
3436 scalar_chain::convert_insn (rtx_insn *insn)
3438 rtx def_set = single_set (insn);
3439 rtx src = SET_SRC (def_set);
3440 rtx dst = SET_DEST (def_set);
3443 if (MEM_P (dst) && !REG_P (src))
3445 /* There are no scalar integer instructions and therefore
3446 temporary register usage is required. */
3447 rtx tmp = gen_reg_rtx (DImode);
3448 emit_conversion_insns (gen_move_insn (dst, tmp), insn);
3449 dst = gen_rtx_SUBREG (V2DImode, tmp, 0);
3452 switch (GET_CODE (src))
3459 convert_op (&XEXP (src, 0), insn);
3460 convert_op (&XEXP (src, 1), insn);
3461 PUT_MODE (src, V2DImode);
3466 convert_op (&src, insn);
3473 gcc_assert (GET_MODE (src) == V2DImode);
3477 src = SUBREG_REG (XEXP (XEXP (src, 0), 0));
3479 gcc_assert ((REG_P (src) && GET_MODE (src) == DImode)
3480 || (SUBREG_P (src) && GET_MODE (src) == V2DImode));
3483 subreg = gen_rtx_SUBREG (V2DImode, src, 0);
3485 subreg = copy_rtx_if_shared (src);
3486 emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg),
3487 copy_rtx_if_shared (subreg),
3488 copy_rtx_if_shared (subreg)),
3490 dst = gen_rtx_REG (CCmode, FLAGS_REG);
3491 src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src),
3492 copy_rtx_if_shared (src)),
3500 SET_SRC (def_set) = src;
3501 SET_DEST (def_set) = dst;
3503 /* Drop possible dead definitions. */
3504 PATTERN (insn) = def_set;
3506 INSN_CODE (insn) = -1;
3507 recog_memoized (insn);
3508 df_insn_rescan (insn);
3511 /* Convert whole chain creating required register
3512 conversions and copies. */
3515 scalar_chain::convert ()
3519 int converted_insns = 0;
3521 if (!dbg_cnt (stv_conversion))
3525 fprintf (dump_file, "Converting chain #%d...\n", chain_id);
3527 EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi)
3530 EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi)
3531 make_vector_copies (id);
3533 EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
3535 convert_insn (DF_INSN_UID_GET (id)->insn);
3539 return converted_insns;
3542 /* Main STV pass function. Find and convert scalar
3543 instructions into vector mode when profitable. */
3546 convert_scalars_to_vector ()
3550 int converted_insns = 0;
3552 bitmap_obstack_initialize (NULL);
3553 candidates = BITMAP_ALLOC (NULL);
3555 calculate_dominance_info (CDI_DOMINATORS);
3556 df_set_flags (DF_DEFER_INSN_RESCAN);
3557 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
3558 df_md_add_problem ();
3561 /* Find all instructions we want to convert into vector mode. */
3563 fprintf (dump_file, "Searching for mode conversion candidates...\n");
3565 FOR_EACH_BB_FN (bb, cfun)
3568 FOR_BB_INSNS (bb, insn)
3569 if (scalar_to_vector_candidate_p (insn))
3572 fprintf (dump_file, " insn %d is marked as a candidate\n",
3575 bitmap_set_bit (candidates, INSN_UID (insn));
3579 remove_non_convertible_regs (candidates);
3581 if (bitmap_empty_p (candidates))
3583 fprintf (dump_file, "There are no candidates for optimization.\n");
3585 while (!bitmap_empty_p (candidates))
3587 unsigned uid = bitmap_first_set_bit (candidates);
3590 /* Find instructions chain we want to convert to vector mode.
3591 Check all uses and definitions to estimate all required
3593 chain.build (candidates, uid);
3595 if (chain.compute_convert_gain () > 0)
3596 converted_insns += chain.convert ();
3599 fprintf (dump_file, "Chain #%d conversion is not profitable\n",
3604 fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
3606 BITMAP_FREE (candidates);
3607 bitmap_obstack_release (NULL);
3608 df_process_deferred_rescans ();
3610 /* Conversion means we may have 128bit register spills/fills
3611 which require aligned stack. */
3612 if (converted_insns)
3614 if (crtl->stack_alignment_needed < 128)
3615 crtl->stack_alignment_needed = 128;
3616 if (crtl->stack_alignment_estimated < 128)
3617 crtl->stack_alignment_estimated = 128;
3625 const pass_data pass_data_insert_vzeroupper =
3627 RTL_PASS, /* type */
3628 "vzeroupper", /* name */
3629 OPTGROUP_NONE, /* optinfo_flags */
3630 TV_NONE, /* tv_id */
3631 0, /* properties_required */
3632 0, /* properties_provided */
3633 0, /* properties_destroyed */
3634 0, /* todo_flags_start */
3635 TODO_df_finish, /* todo_flags_finish */
3638 class pass_insert_vzeroupper : public rtl_opt_pass
3641 pass_insert_vzeroupper(gcc::context *ctxt)
3642 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
3645 /* opt_pass methods: */
3646 virtual bool gate (function *)
3648 return TARGET_AVX && !TARGET_AVX512F
3649 && TARGET_VZEROUPPER && flag_expensive_optimizations
3653 virtual unsigned int execute (function *)
3655 return rest_of_handle_insert_vzeroupper ();
3658 }; // class pass_insert_vzeroupper
3660 const pass_data pass_data_stv =
3662 RTL_PASS, /* type */
3664 OPTGROUP_NONE, /* optinfo_flags */
3665 TV_NONE, /* tv_id */
3666 0, /* properties_required */
3667 0, /* properties_provided */
3668 0, /* properties_destroyed */
3669 0, /* todo_flags_start */
3670 TODO_df_finish, /* todo_flags_finish */
3673 class pass_stv : public rtl_opt_pass
3676 pass_stv (gcc::context *ctxt)
3677 : rtl_opt_pass (pass_data_stv, ctxt)
3680 /* opt_pass methods: */
3681 virtual bool gate (function *)
3683 return !TARGET_64BIT && TARGET_STV && TARGET_SSE2 && optimize > 1;
3686 virtual unsigned int execute (function *)
3688 return convert_scalars_to_vector ();
3691 }; // class pass_stv
3696 make_pass_insert_vzeroupper (gcc::context *ctxt)
3698 return new pass_insert_vzeroupper (ctxt);
3702 make_pass_stv (gcc::context *ctxt)
3704 return new pass_stv (ctxt);
3707 /* Return true if a red-zone is in use. */
3710 ix86_using_red_zone (void)
3712 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
3715 /* Return a string that documents the current -m options. The caller is
3716 responsible for freeing the string. */
3719 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
3720 const char *tune, enum fpmath_unit fpmath,
3723 struct ix86_target_opts
3725 const char *option; /* option string */
3726 HOST_WIDE_INT mask; /* isa mask options */
3729 /* This table is ordered so that options like -msse4.2 that imply
3730 preceding options while match those first. */
3731 static struct ix86_target_opts isa_opts[] =
3733 { "-mfma4", OPTION_MASK_ISA_FMA4 },
3734 { "-mfma", OPTION_MASK_ISA_FMA },
3735 { "-mxop", OPTION_MASK_ISA_XOP },
3736 { "-mlwp", OPTION_MASK_ISA_LWP },
3737 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
3738 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
3739 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
3740 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
3741 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
3742 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
3743 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
3744 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
3745 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
3746 { "-msse4a", OPTION_MASK_ISA_SSE4A },
3747 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
3748 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
3749 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
3750 { "-msse3", OPTION_MASK_ISA_SSE3 },
3751 { "-msse2", OPTION_MASK_ISA_SSE2 },
3752 { "-msse", OPTION_MASK_ISA_SSE },
3753 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
3754 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
3755 { "-mmmx", OPTION_MASK_ISA_MMX },
3756 { "-mabm", OPTION_MASK_ISA_ABM },
3757 { "-mbmi", OPTION_MASK_ISA_BMI },
3758 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
3759 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
3760 { "-mhle", OPTION_MASK_ISA_HLE },
3761 { "-mfxsr", OPTION_MASK_ISA_FXSR },
3762 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
3763 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
3764 { "-madx", OPTION_MASK_ISA_ADX },
3765 { "-mtbm", OPTION_MASK_ISA_TBM },
3766 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
3767 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
3768 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
3769 { "-maes", OPTION_MASK_ISA_AES },
3770 { "-msha", OPTION_MASK_ISA_SHA },
3771 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
3772 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
3773 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
3774 { "-mf16c", OPTION_MASK_ISA_F16C },
3775 { "-mrtm", OPTION_MASK_ISA_RTM },
3776 { "-mxsave", OPTION_MASK_ISA_XSAVE },
3777 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
3778 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
3779 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
3780 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
3781 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
3782 { "-mmpx", OPTION_MASK_ISA_MPX },
3783 { "-mclwb", OPTION_MASK_ISA_CLWB },
3784 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
3785 { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
3786 { "-mclzero", OPTION_MASK_ISA_CLZERO },
3787 { "-mpku", OPTION_MASK_ISA_PKU },
3791 static struct ix86_target_opts flag_opts[] =
3793 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
3794 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
3795 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
3796 { "-m80387", MASK_80387 },
3797 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
3798 { "-malign-double", MASK_ALIGN_DOUBLE },
3799 { "-mcld", MASK_CLD },
3800 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
3801 { "-mieee-fp", MASK_IEEE_FP },
3802 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
3803 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
3804 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
3805 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
3806 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
3807 { "-mno-push-args", MASK_NO_PUSH_ARGS },
3808 { "-mno-red-zone", MASK_NO_RED_ZONE },
3809 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
3810 { "-mrecip", MASK_RECIP },
3811 { "-mrtd", MASK_RTD },
3812 { "-msseregparm", MASK_SSEREGPARM },
3813 { "-mstack-arg-probe", MASK_STACK_PROBE },
3814 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
3815 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
3816 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
3817 { "-mvzeroupper", MASK_VZEROUPPER },
3818 { "-mstv", MASK_STV},
3819 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
3820 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
3821 { "-mprefer-avx128", MASK_PREFER_AVX128},
3824 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
3827 char target_other[40];
3837 memset (opts, '\0', sizeof (opts));
3839 /* Add -march= option. */
3842 opts[num][0] = "-march=";
3843 opts[num++][1] = arch;
3846 /* Add -mtune= option. */
3849 opts[num][0] = "-mtune=";
3850 opts[num++][1] = tune;
3853 /* Add -m32/-m64/-mx32. */
3854 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
3856 if ((isa & OPTION_MASK_ABI_64) != 0)
3860 isa &= ~ (OPTION_MASK_ISA_64BIT
3861 | OPTION_MASK_ABI_64
3862 | OPTION_MASK_ABI_X32);
3866 opts[num++][0] = abi;
3868 /* Pick out the options in isa options. */
3869 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
3871 if ((isa & isa_opts[i].mask) != 0)
3873 opts[num++][0] = isa_opts[i].option;
3874 isa &= ~ isa_opts[i].mask;
3878 if (isa && add_nl_p)
3880 opts[num++][0] = isa_other;
3881 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
3885 /* Add flag options. */
3886 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
3888 if ((flags & flag_opts[i].mask) != 0)
3890 opts[num++][0] = flag_opts[i].option;
3891 flags &= ~ flag_opts[i].mask;
3895 if (flags && add_nl_p)
3897 opts[num++][0] = target_other;
3898 sprintf (target_other, "(other flags: %#x)", flags);
3901 /* Add -fpmath= option. */
3904 opts[num][0] = "-mfpmath=";
3905 switch ((int) fpmath)
3908 opts[num++][1] = "387";
3912 opts[num++][1] = "sse";
3915 case FPMATH_387 | FPMATH_SSE:
3916 opts[num++][1] = "sse+387";
3928 gcc_assert (num < ARRAY_SIZE (opts));
3930 /* Size the string. */
3932 sep_len = (add_nl_p) ? 3 : 1;
3933 for (i = 0; i < num; i++)
3936 for (j = 0; j < 2; j++)
3938 len += strlen (opts[i][j]);
3941 /* Build the string. */
3942 ret = ptr = (char *) xmalloc (len);
3945 for (i = 0; i < num; i++)
3949 for (j = 0; j < 2; j++)
3950 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3957 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3965 for (j = 0; j < 2; j++)
3968 memcpy (ptr, opts[i][j], len2[j]);
3970 line_len += len2[j];
3975 gcc_assert (ret + len >= ptr);
3980 /* Return true, if profiling code should be emitted before
3981 prologue. Otherwise it returns false.
3982 Note: For x86 with "hotfix" it is sorried. */
3984 ix86_profile_before_prologue (void)
3986 return flag_fentry != 0;
3989 /* Function that is callable from the debugger to print the current
3991 void ATTRIBUTE_UNUSED
3992 ix86_debug_options (void)
3994 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
3995 ix86_arch_string, ix86_tune_string,
4000 fprintf (stderr, "%s\n\n", opts);
4004 fputs ("<no options>\n\n", stderr);
4009 /* Return true if T is one of the bytes we should avoid with
4013 ix86_rop_should_change_byte_p (int t)
4015 return t == 0xc2 || t == 0xc3 || t == 0xca || t == 0xcb;
4018 static const char *stringop_alg_names[] = {
4020 #define DEF_ALG(alg, name) #name,
4021 #include "stringop.def"
4026 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
4027 The string is of the following form (or comma separated list of it):
4029 strategy_alg:max_size:[align|noalign]
4031 where the full size range for the strategy is either [0, max_size] or
4032 [min_size, max_size], in which min_size is the max_size + 1 of the
4033 preceding range. The last size range must have max_size == -1.
4038 -mmemcpy-strategy=libcall:-1:noalign
4040 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
4044 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
4046 This is to tell the compiler to use the following strategy for memset
4047 1) when the expected size is between [1, 16], use rep_8byte strategy;
4048 2) when the size is between [17, 2048], use vector_loop;
4049 3) when the size is > 2048, use libcall. */
4051 struct stringop_size_range
4059 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
4061 const struct stringop_algs *default_algs;
4062 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
4063 char *curr_range_str, *next_range_str;
4067 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
4069 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
4071 curr_range_str = strategy_str;
4078 next_range_str = strchr (curr_range_str, ',');
4080 *next_range_str++ = '\0';
4082 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
4083 alg_name, &maxs, align))
4085 error ("wrong arg %s to option %s", curr_range_str,
4086 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4090 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
4092 error ("size ranges of option %s should be increasing",
4093 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4097 for (i = 0; i < last_alg; i++)
4098 if (!strcmp (alg_name, stringop_alg_names[i]))
4103 error ("wrong stringop strategy name %s specified for option %s",
4105 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4109 if ((stringop_alg) i == rep_prefix_8_byte
4112 /* rep; movq isn't available in 32-bit code. */
4113 error ("stringop strategy name %s specified for option %s "
4114 "not supported for 32-bit code",
4116 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4120 input_ranges[n].max = maxs;
4121 input_ranges[n].alg = (stringop_alg) i;
4122 if (!strcmp (align, "align"))
4123 input_ranges[n].noalign = false;
4124 else if (!strcmp (align, "noalign"))
4125 input_ranges[n].noalign = true;
4128 error ("unknown alignment %s specified for option %s",
4129 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4133 curr_range_str = next_range_str;
4135 while (curr_range_str);
4137 if (input_ranges[n - 1].max != -1)
4139 error ("the max value for the last size range should be -1"
4141 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4145 if (n > MAX_STRINGOP_ALGS)
4147 error ("too many size ranges specified in option %s",
4148 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4152 /* Now override the default algs array. */
4153 for (i = 0; i < n; i++)
4155 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
4156 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
4157 = input_ranges[i].alg;
4158 *const_cast<int *>(&default_algs->size[i].noalign)
4159 = input_ranges[i].noalign;
4164 /* parse -mtune-ctrl= option. When DUMP is true,
4165 print the features that are explicitly set. */
4168 parse_mtune_ctrl_str (bool dump)
4170 if (!ix86_tune_ctrl_string)
4173 char *next_feature_string = NULL;
4174 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
4175 char *orig = curr_feature_string;
4181 next_feature_string = strchr (curr_feature_string, ',');
4182 if (next_feature_string)
4183 *next_feature_string++ = '\0';
4184 if (*curr_feature_string == '^')
4186 curr_feature_string++;
4189 for (i = 0; i < X86_TUNE_LAST; i++)
4191 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
4193 ix86_tune_features[i] = !clear;
4195 fprintf (stderr, "Explicitly %s feature %s\n",
4196 clear ? "clear" : "set", ix86_tune_feature_names[i]);
4200 if (i == X86_TUNE_LAST)
4201 error ("Unknown parameter to option -mtune-ctrl: %s",
4202 clear ? curr_feature_string - 1 : curr_feature_string);
4203 curr_feature_string = next_feature_string;
4205 while (curr_feature_string);
4209 /* Helper function to set ix86_tune_features. IX86_TUNE is the
4213 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
4215 unsigned int ix86_tune_mask = 1u << ix86_tune;
4218 for (i = 0; i < X86_TUNE_LAST; ++i)
4220 if (ix86_tune_no_default)
4221 ix86_tune_features[i] = 0;
4223 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4228 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
4229 for (i = 0; i < X86_TUNE_LAST; i++)
4230 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
4231 ix86_tune_features[i] ? "on" : "off");
4234 parse_mtune_ctrl_str (dump);
4238 /* Default align_* from the processor table. */
4241 ix86_default_align (struct gcc_options *opts)
4243 if (opts->x_align_loops == 0)
4245 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
4246 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
4248 if (opts->x_align_jumps == 0)
4250 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
4251 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
4253 if (opts->x_align_functions == 0)
4255 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
4259 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
4262 ix86_override_options_after_change (void)
4264 ix86_default_align (&global_options);
4267 /* Override various settings based on options. If MAIN_ARGS_P, the
4268 options are from the command line, otherwise they are from
4272 ix86_option_override_internal (bool main_args_p,
4273 struct gcc_options *opts,
4274 struct gcc_options *opts_set)
4277 unsigned int ix86_arch_mask;
4278 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
4283 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
4284 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
4285 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
4286 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
4287 #define PTA_AES (HOST_WIDE_INT_1 << 4)
4288 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
4289 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
4290 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
4291 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
4292 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
4293 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
4294 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
4295 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
4296 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
4297 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
4298 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
4299 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
4300 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
4301 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
4302 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
4303 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
4304 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
4305 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
4306 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
4307 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
4308 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
4309 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
4310 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
4311 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
4312 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
4313 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
4314 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
4315 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
4316 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
4317 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
4318 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
4319 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
4320 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
4321 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
4322 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
4323 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
4324 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
4325 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
4326 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
4327 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
4328 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
4329 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
4330 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
4331 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
4332 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
4333 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
4334 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
4335 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
4336 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
4337 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
4338 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
4339 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
4340 #define PTA_MWAITX (HOST_WIDE_INT_1 << 57)
4341 #define PTA_CLZERO (HOST_WIDE_INT_1 << 58)
4342 #define PTA_NO_80387 (HOST_WIDE_INT_1 << 59)
4343 #define PTA_PKU (HOST_WIDE_INT_1 << 60)
4346 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
4347 | PTA_CX16 | PTA_FXSR)
4348 #define PTA_NEHALEM \
4349 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
4350 #define PTA_WESTMERE \
4351 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
4352 #define PTA_SANDYBRIDGE \
4353 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
4354 #define PTA_IVYBRIDGE \
4355 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
4356 #define PTA_HASWELL \
4357 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
4358 | PTA_FMA | PTA_MOVBE | PTA_HLE)
4359 #define PTA_BROADWELL \
4360 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
4361 #define PTA_SKYLAKE \
4362 (PTA_BROADWELL | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES)
4363 #define PTA_SKYLAKE_AVX512 \
4364 (PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL \
4365 | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU)
4367 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
4368 #define PTA_BONNELL \
4369 (PTA_CORE2 | PTA_MOVBE)
4370 #define PTA_SILVERMONT \
4371 (PTA_WESTMERE | PTA_MOVBE)
4373 /* if this reaches 64, need to widen struct pta flags below */
4377 const char *const name; /* processor name or nickname. */
4378 const enum processor_type processor;
4379 const enum attr_cpu schedule;
4380 const unsigned HOST_WIDE_INT flags;
4382 const processor_alias_table[] =
4384 {"i386", PROCESSOR_I386, CPU_NONE, 0},
4385 {"i486", PROCESSOR_I486, CPU_NONE, 0},
4386 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
4387 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
4388 {"lakemont", PROCESSOR_LAKEMONT, CPU_PENTIUM, PTA_NO_80387},
4389 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
4390 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
4391 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4392 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4393 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4394 PTA_MMX | PTA_SSE | PTA_FXSR},
4395 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
4396 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
4397 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
4398 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4399 PTA_MMX | PTA_SSE | PTA_FXSR},
4400 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4401 PTA_MMX | PTA_SSE | PTA_FXSR},
4402 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4403 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
4404 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
4405 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
4406 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
4407 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
4408 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
4409 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
4410 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
4411 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4412 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
4413 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
4414 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
4415 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
4416 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
4417 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4419 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4421 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4423 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4425 {"haswell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
4426 {"core-avx2", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
4427 {"broadwell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_BROADWELL},
4428 {"skylake", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE},
4429 {"skylake-avx512", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE_AVX512},
4430 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
4431 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
4432 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
4433 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
4434 {"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL},
4435 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
4436 {"geode", PROCESSOR_GEODE, CPU_GEODE,
4437 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
4438 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
4439 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4440 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4441 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
4442 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
4443 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
4444 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
4445 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
4446 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
4447 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
4448 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
4449 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
4450 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
4451 {"x86-64", PROCESSOR_K8, CPU_K8,
4452 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
4453 {"k8", PROCESSOR_K8, CPU_K8,
4454 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4455 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4456 {"k8-sse3", PROCESSOR_K8, CPU_K8,
4457 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4458 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4459 {"opteron", PROCESSOR_K8, CPU_K8,
4460 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4461 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4462 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
4463 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4464 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4465 {"athlon64", PROCESSOR_K8, CPU_K8,
4466 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4467 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4468 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
4469 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4470 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4471 {"athlon-fx", PROCESSOR_K8, CPU_K8,
4472 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4473 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4474 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
4475 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
4476 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
4477 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
4478 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
4479 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
4480 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
4481 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4482 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4483 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
4484 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
4485 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
4486 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4487 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4488 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
4489 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
4490 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
4491 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
4492 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4493 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4494 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
4495 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
4496 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
4497 | PTA_XSAVEOPT | PTA_FSGSBASE},
4498 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
4499 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4500 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4501 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
4502 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
4503 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
4504 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
4505 | PTA_MOVBE | PTA_MWAITX},
4506 {"znver1", PROCESSOR_ZNVER1, CPU_ZNVER1,
4507 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4508 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4509 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
4510 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW
4511 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
4512 | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
4513 | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES
4514 | PTA_SHA | PTA_LZCNT | PTA_POPCNT},
4515 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
4516 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4517 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
4518 | PTA_FXSR | PTA_XSAVE},
4519 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
4520 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4521 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
4522 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
4523 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
4524 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
4526 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
4528 | PTA_HLE /* flags are only used for -march switch. */ },
4531 /* -mrecip options. */
4534 const char *string; /* option name */
4535 unsigned int mask; /* mask bits to set */
4537 const recip_options[] =
4539 { "all", RECIP_MASK_ALL },
4540 { "none", RECIP_MASK_NONE },
4541 { "div", RECIP_MASK_DIV },
4542 { "sqrt", RECIP_MASK_SQRT },
4543 { "vec-div", RECIP_MASK_VEC_DIV },
4544 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
4547 int const pta_size = ARRAY_SIZE (processor_alias_table);
4549 /* Set up prefix/suffix so the error messages refer to either the command
4550 line argument, or the attribute(target). */
4559 prefix = "option(\"";
4564 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
4565 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
4566 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
4567 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
4568 #ifdef TARGET_BI_ARCH
4571 #if TARGET_BI_ARCH == 1
4572 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
4573 is on and OPTION_MASK_ABI_X32 is off. We turn off
4574 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
4576 if (TARGET_X32_P (opts->x_ix86_isa_flags))
4577 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
4579 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
4580 on and OPTION_MASK_ABI_64 is off. We turn off
4581 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
4582 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
4583 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
4584 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
4585 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
4587 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4588 && TARGET_IAMCU_P (opts->x_target_flags))
4589 sorry ("Intel MCU psABI isn%'t supported in %s mode",
4590 TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit");
4594 if (TARGET_X32_P (opts->x_ix86_isa_flags))
4596 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
4597 OPTION_MASK_ABI_64 for TARGET_X32. */
4598 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
4599 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
4601 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
4602 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
4603 | OPTION_MASK_ABI_X32
4604 | OPTION_MASK_ABI_64);
4605 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
4607 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
4608 OPTION_MASK_ABI_X32 for TARGET_LP64. */
4609 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
4610 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
4613 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4614 SUBTARGET_OVERRIDE_OPTIONS;
4617 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4618 SUBSUBTARGET_OVERRIDE_OPTIONS;
4621 /* -fPIC is the default for x86_64. */
4622 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
4623 opts->x_flag_pic = 2;
4625 /* Need to check -mtune=generic first. */
4626 if (opts->x_ix86_tune_string)
4628 /* As special support for cross compilers we read -mtune=native
4629 as -mtune=generic. With native compilers we won't see the
4630 -mtune=native, as it was changed by the driver. */
4631 if (!strcmp (opts->x_ix86_tune_string, "native"))
4633 opts->x_ix86_tune_string = "generic";
4635 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
4636 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
4637 "%stune=k8%s or %stune=generic%s instead as appropriate",
4638 prefix, suffix, prefix, suffix, prefix, suffix);
4642 if (opts->x_ix86_arch_string)
4643 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
4644 if (!opts->x_ix86_tune_string)
4646 opts->x_ix86_tune_string
4647 = processor_target_table[TARGET_CPU_DEFAULT].name;
4648 ix86_tune_defaulted = 1;
4651 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
4652 or defaulted. We need to use a sensible tune option. */
4653 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
4655 opts->x_ix86_tune_string = "generic";
4659 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
4660 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
4662 /* rep; movq isn't available in 32-bit code. */
4663 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
4664 opts->x_ix86_stringop_alg = no_stringop;
4667 if (!opts->x_ix86_arch_string)
4668 opts->x_ix86_arch_string
4669 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
4670 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
4672 ix86_arch_specified = 1;
4674 if (opts_set->x_ix86_pmode)
4676 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
4677 && opts->x_ix86_pmode == PMODE_SI)
4678 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4679 && opts->x_ix86_pmode == PMODE_DI))
4680 error ("address mode %qs not supported in the %s bit mode",
4681 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
4682 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
4685 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
4686 ? PMODE_DI : PMODE_SI;
4688 if (!opts_set->x_ix86_abi)
4689 opts->x_ix86_abi = DEFAULT_ABI;
4691 /* For targets using ms ABI enable ms-extensions, if not
4692 explicit turned off. For non-ms ABI we turn off this
4694 if (!opts_set->x_flag_ms_extensions)
4695 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
4697 if (opts_set->x_ix86_cmodel)
4699 switch (opts->x_ix86_cmodel)
4703 if (opts->x_flag_pic)
4704 opts->x_ix86_cmodel = CM_SMALL_PIC;
4705 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4706 error ("code model %qs not supported in the %s bit mode",
4712 if (opts->x_flag_pic)
4713 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
4714 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4715 error ("code model %qs not supported in the %s bit mode",
4717 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
4718 error ("code model %qs not supported in x32 mode",
4724 if (opts->x_flag_pic)
4725 opts->x_ix86_cmodel = CM_LARGE_PIC;
4726 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4727 error ("code model %qs not supported in the %s bit mode",
4729 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
4730 error ("code model %qs not supported in x32 mode",
4735 if (opts->x_flag_pic)
4736 error ("code model %s does not support PIC mode", "32");
4737 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4738 error ("code model %qs not supported in the %s bit mode",
4743 if (opts->x_flag_pic)
4745 error ("code model %s does not support PIC mode", "kernel");
4746 opts->x_ix86_cmodel = CM_32;
4748 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4749 error ("code model %qs not supported in the %s bit mode",
4759 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
4760 use of rip-relative addressing. This eliminates fixups that
4761 would otherwise be needed if this object is to be placed in a
4762 DLL, and is essentially just as efficient as direct addressing. */
4763 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4764 && (TARGET_RDOS || TARGET_PECOFF))
4765 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
4766 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4767 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
4769 opts->x_ix86_cmodel = CM_32;
4771 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
4773 error ("-masm=intel not supported in this configuration");
4774 opts->x_ix86_asm_dialect = ASM_ATT;
4776 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
4777 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
4778 sorry ("%i-bit mode not compiled in",
4779 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
4781 for (i = 0; i < pta_size; i++)
4782 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
4784 ix86_schedule = processor_alias_table[i].schedule;
4785 ix86_arch = processor_alias_table[i].processor;
4786 /* Default cpu tuning to the architecture. */
4787 ix86_tune = ix86_arch;
4789 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4790 && !(processor_alias_table[i].flags & PTA_64BIT))
4791 error ("CPU you selected does not support x86-64 "
4794 if (processor_alias_table[i].flags & PTA_MMX
4795 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
4796 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
4797 if (processor_alias_table[i].flags & PTA_3DNOW
4798 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
4799 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
4800 if (processor_alias_table[i].flags & PTA_3DNOW_A
4801 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
4802 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
4803 if (processor_alias_table[i].flags & PTA_SSE
4804 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
4805 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
4806 if (processor_alias_table[i].flags & PTA_SSE2
4807 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
4808 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
4809 if (processor_alias_table[i].flags & PTA_SSE3
4810 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
4811 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
4812 if (processor_alias_table[i].flags & PTA_SSSE3
4813 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
4814 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
4815 if (processor_alias_table[i].flags & PTA_SSE4_1
4816 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
4817 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
4818 if (processor_alias_table[i].flags & PTA_SSE4_2
4819 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
4820 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
4821 if (processor_alias_table[i].flags & PTA_AVX
4822 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
4823 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
4824 if (processor_alias_table[i].flags & PTA_AVX2
4825 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
4826 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
4827 if (processor_alias_table[i].flags & PTA_FMA
4828 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
4829 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
4830 if (processor_alias_table[i].flags & PTA_SSE4A
4831 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
4832 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
4833 if (processor_alias_table[i].flags & PTA_FMA4
4834 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
4835 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
4836 if (processor_alias_table[i].flags & PTA_XOP
4837 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
4838 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
4839 if (processor_alias_table[i].flags & PTA_LWP
4840 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
4841 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
4842 if (processor_alias_table[i].flags & PTA_ABM
4843 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
4844 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
4845 if (processor_alias_table[i].flags & PTA_BMI
4846 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
4847 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
4848 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
4849 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
4850 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
4851 if (processor_alias_table[i].flags & PTA_TBM
4852 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
4853 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
4854 if (processor_alias_table[i].flags & PTA_BMI2
4855 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
4856 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
4857 if (processor_alias_table[i].flags & PTA_CX16
4858 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
4859 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
4860 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
4861 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
4862 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
4863 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
4864 && (processor_alias_table[i].flags & PTA_NO_SAHF))
4865 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
4866 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
4867 if (processor_alias_table[i].flags & PTA_MOVBE
4868 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
4869 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
4870 if (processor_alias_table[i].flags & PTA_AES
4871 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
4872 ix86_isa_flags |= OPTION_MASK_ISA_AES;
4873 if (processor_alias_table[i].flags & PTA_SHA
4874 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
4875 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
4876 if (processor_alias_table[i].flags & PTA_PCLMUL
4877 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
4878 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
4879 if (processor_alias_table[i].flags & PTA_FSGSBASE
4880 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
4881 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
4882 if (processor_alias_table[i].flags & PTA_RDRND
4883 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
4884 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
4885 if (processor_alias_table[i].flags & PTA_F16C
4886 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
4887 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
4888 if (processor_alias_table[i].flags & PTA_RTM
4889 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
4890 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
4891 if (processor_alias_table[i].flags & PTA_HLE
4892 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
4893 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
4894 if (processor_alias_table[i].flags & PTA_PRFCHW
4895 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
4896 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
4897 if (processor_alias_table[i].flags & PTA_RDSEED
4898 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
4899 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
4900 if (processor_alias_table[i].flags & PTA_ADX
4901 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
4902 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
4903 if (processor_alias_table[i].flags & PTA_FXSR
4904 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
4905 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
4906 if (processor_alias_table[i].flags & PTA_XSAVE
4907 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
4908 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
4909 if (processor_alias_table[i].flags & PTA_XSAVEOPT
4910 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
4911 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
4912 if (processor_alias_table[i].flags & PTA_AVX512F
4913 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
4914 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
4915 if (processor_alias_table[i].flags & PTA_AVX512ER
4916 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
4917 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
4918 if (processor_alias_table[i].flags & PTA_AVX512PF
4919 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
4920 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
4921 if (processor_alias_table[i].flags & PTA_AVX512CD
4922 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
4923 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
4924 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
4925 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
4926 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
4927 if (processor_alias_table[i].flags & PTA_PCOMMIT
4928 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
4929 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
4930 if (processor_alias_table[i].flags & PTA_CLWB
4931 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
4932 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
4933 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
4934 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
4935 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
4936 if (processor_alias_table[i].flags & PTA_CLZERO
4937 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLZERO))
4938 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLZERO;
4939 if (processor_alias_table[i].flags & PTA_XSAVEC
4940 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
4941 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
4942 if (processor_alias_table[i].flags & PTA_XSAVES
4943 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
4944 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
4945 if (processor_alias_table[i].flags & PTA_AVX512DQ
4946 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
4947 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
4948 if (processor_alias_table[i].flags & PTA_AVX512BW
4949 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
4950 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
4951 if (processor_alias_table[i].flags & PTA_AVX512VL
4952 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
4953 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
4954 if (processor_alias_table[i].flags & PTA_MPX
4955 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
4956 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
4957 if (processor_alias_table[i].flags & PTA_AVX512VBMI
4958 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
4959 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
4960 if (processor_alias_table[i].flags & PTA_AVX512IFMA
4961 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
4962 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
4963 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
4964 x86_prefetch_sse = true;
4965 if (processor_alias_table[i].flags & PTA_MWAITX
4966 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX))
4967 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX;
4968 if (processor_alias_table[i].flags & PTA_PKU
4969 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU))
4970 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU;
4972 if (!(opts_set->x_target_flags & MASK_80387))
4974 if (processor_alias_table[i].flags & PTA_NO_80387)
4975 opts->x_target_flags &= ~MASK_80387;
4977 opts->x_target_flags |= MASK_80387;
4982 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
4983 error ("Intel MPX does not support x32");
4985 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
4986 error ("Intel MPX does not support x32");
4988 if (!strcmp (opts->x_ix86_arch_string, "generic"))
4989 error ("generic CPU can be used only for %stune=%s %s",
4990 prefix, suffix, sw);
4991 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
4992 error ("intel CPU can be used only for %stune=%s %s",
4993 prefix, suffix, sw);
4994 else if (i == pta_size)
4995 error ("bad value (%s) for %sarch=%s %s",
4996 opts->x_ix86_arch_string, prefix, suffix, sw);
4998 ix86_arch_mask = 1u << ix86_arch;
4999 for (i = 0; i < X86_ARCH_LAST; ++i)
5000 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
5002 for (i = 0; i < pta_size; i++)
5003 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
5005 ix86_schedule = processor_alias_table[i].schedule;
5006 ix86_tune = processor_alias_table[i].processor;
5007 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5009 if (!(processor_alias_table[i].flags & PTA_64BIT))
5011 if (ix86_tune_defaulted)
5013 opts->x_ix86_tune_string = "x86-64";
5014 for (i = 0; i < pta_size; i++)
5015 if (! strcmp (opts->x_ix86_tune_string,
5016 processor_alias_table[i].name))
5018 ix86_schedule = processor_alias_table[i].schedule;
5019 ix86_tune = processor_alias_table[i].processor;
5022 error ("CPU you selected does not support x86-64 "
5026 /* Intel CPUs have always interpreted SSE prefetch instructions as
5027 NOPs; so, we can enable SSE prefetch instructions even when
5028 -mtune (rather than -march) points us to a processor that has them.
5029 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
5030 higher processors. */
5032 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
5033 x86_prefetch_sse = true;
5037 if (ix86_tune_specified && i == pta_size)
5038 error ("bad value (%s) for %stune=%s %s",
5039 opts->x_ix86_tune_string, prefix, suffix, sw);
5041 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
5043 #ifndef USE_IX86_FRAME_POINTER
5044 #define USE_IX86_FRAME_POINTER 0
5047 #ifndef USE_X86_64_FRAME_POINTER
5048 #define USE_X86_64_FRAME_POINTER 0
5051 /* Set the default values for switches whose default depends on TARGET_64BIT
5052 in case they weren't overwritten by command line options. */
5053 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5055 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
5056 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
5057 if (opts->x_flag_asynchronous_unwind_tables
5058 && !opts_set->x_flag_unwind_tables
5059 && TARGET_64BIT_MS_ABI)
5060 opts->x_flag_unwind_tables = 1;
5061 if (opts->x_flag_asynchronous_unwind_tables == 2)
5062 opts->x_flag_unwind_tables
5063 = opts->x_flag_asynchronous_unwind_tables = 1;
5064 if (opts->x_flag_pcc_struct_return == 2)
5065 opts->x_flag_pcc_struct_return = 0;
5069 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
5070 opts->x_flag_omit_frame_pointer
5071 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
5072 if (opts->x_flag_asynchronous_unwind_tables == 2)
5073 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
5074 if (opts->x_flag_pcc_struct_return == 2)
5076 /* Intel MCU psABI specifies that -freg-struct-return should
5077 be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1,
5078 we check -miamcu so that -freg-struct-return is always
5079 turned on if -miamcu is used. */
5080 if (TARGET_IAMCU_P (opts->x_target_flags))
5081 opts->x_flag_pcc_struct_return = 0;
5083 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
5087 ix86_tune_cost = processor_target_table[ix86_tune].cost;
5088 /* TODO: ix86_cost should be chosen at instruction or function granuality
5089 so for cold code we use size_cost even in !optimize_size compilation. */
5090 if (opts->x_optimize_size)
5091 ix86_cost = &ix86_size_cost;
5093 ix86_cost = ix86_tune_cost;
5095 /* Arrange to set up i386_stack_locals for all functions. */
5096 init_machine_status = ix86_init_machine_status;
5098 /* Validate -mregparm= value. */
5099 if (opts_set->x_ix86_regparm)
5101 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5102 warning (0, "-mregparm is ignored in 64-bit mode");
5103 else if (TARGET_IAMCU_P (opts->x_target_flags))
5104 warning (0, "-mregparm is ignored for Intel MCU psABI");
5105 if (opts->x_ix86_regparm > REGPARM_MAX)
5107 error ("-mregparm=%d is not between 0 and %d",
5108 opts->x_ix86_regparm, REGPARM_MAX);
5109 opts->x_ix86_regparm = 0;
5112 if (TARGET_IAMCU_P (opts->x_target_flags)
5113 || TARGET_64BIT_P (opts->x_ix86_isa_flags))
5114 opts->x_ix86_regparm = REGPARM_MAX;
5116 /* Default align_* from the processor table. */
5117 ix86_default_align (opts);
5119 /* Provide default for -mbranch-cost= value. */
5120 if (!opts_set->x_ix86_branch_cost)
5121 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
5123 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5125 opts->x_target_flags
5126 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
5128 /* Enable by default the SSE and MMX builtins. Do allow the user to
5129 explicitly disable any of these. In particular, disabling SSE and
5130 MMX for kernel code is extremely useful. */
5131 if (!ix86_arch_specified)
5132 opts->x_ix86_isa_flags
5133 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
5134 | TARGET_SUBTARGET64_ISA_DEFAULT)
5135 & ~opts->x_ix86_isa_flags_explicit);
5137 if (TARGET_RTD_P (opts->x_target_flags))
5138 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
5142 opts->x_target_flags
5143 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
5145 if (!ix86_arch_specified)
5146 opts->x_ix86_isa_flags
5147 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
5149 /* i386 ABI does not specify red zone. It still makes sense to use it
5150 when programmer takes care to stack from being destroyed. */
5151 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
5152 opts->x_target_flags |= MASK_NO_RED_ZONE;
5155 /* Keep nonleaf frame pointers. */
5156 if (opts->x_flag_omit_frame_pointer)
5157 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
5158 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
5159 opts->x_flag_omit_frame_pointer = 1;
5161 /* If we're doing fast math, we don't care about comparison order
5162 wrt NaNs. This lets us use a shorter comparison sequence. */
5163 if (opts->x_flag_finite_math_only)
5164 opts->x_target_flags &= ~MASK_IEEE_FP;
5166 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
5167 since the insns won't need emulation. */
5168 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
5169 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
5171 /* Likewise, if the target doesn't have a 387, or we've specified
5172 software floating point, don't use 387 inline intrinsics. */
5173 if (!TARGET_80387_P (opts->x_target_flags))
5174 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
5176 /* Turn on MMX builtins for -msse. */
5177 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
5178 opts->x_ix86_isa_flags
5179 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
5181 /* Enable SSE prefetch. */
5182 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
5183 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
5184 x86_prefetch_sse = true;
5186 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
5187 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
5188 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
5189 opts->x_ix86_isa_flags
5190 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
5192 /* Enable popcnt instruction for -msse4.2 or -mabm. */
5193 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
5194 || TARGET_ABM_P (opts->x_ix86_isa_flags))
5195 opts->x_ix86_isa_flags
5196 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
5198 /* Enable lzcnt instruction for -mabm. */
5199 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
5200 opts->x_ix86_isa_flags
5201 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
5203 /* Validate -mpreferred-stack-boundary= value or default it to
5204 PREFERRED_STACK_BOUNDARY_DEFAULT. */
5205 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
5206 if (opts_set->x_ix86_preferred_stack_boundary_arg)
5208 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
5209 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
5210 int max = (TARGET_SEH ? 4 : 12);
5212 if (opts->x_ix86_preferred_stack_boundary_arg < min
5213 || opts->x_ix86_preferred_stack_boundary_arg > max)
5216 error ("-mpreferred-stack-boundary is not supported "
5219 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
5220 opts->x_ix86_preferred_stack_boundary_arg, min, max);
5223 ix86_preferred_stack_boundary
5224 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
5227 /* Set the default value for -mstackrealign. */
5228 if (opts->x_ix86_force_align_arg_pointer == -1)
5229 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
5231 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
5233 /* Validate -mincoming-stack-boundary= value or default it to
5234 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
5235 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
5236 if (opts_set->x_ix86_incoming_stack_boundary_arg)
5238 int min = TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2;
5240 if (opts->x_ix86_incoming_stack_boundary_arg < min
5241 || opts->x_ix86_incoming_stack_boundary_arg > 12)
5242 error ("-mincoming-stack-boundary=%d is not between %d and 12",
5243 opts->x_ix86_incoming_stack_boundary_arg, min);
5246 ix86_user_incoming_stack_boundary
5247 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
5248 ix86_incoming_stack_boundary
5249 = ix86_user_incoming_stack_boundary;
5253 #ifndef NO_PROFILE_COUNTERS
5254 if (flag_nop_mcount)
5255 error ("-mnop-mcount is not compatible with this target");
5257 if (flag_nop_mcount && flag_pic)
5258 error ("-mnop-mcount is not implemented for -fPIC");
5260 /* Accept -msseregparm only if at least SSE support is enabled. */
5261 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
5262 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
5263 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
5265 if (opts_set->x_ix86_fpmath)
5267 if (opts->x_ix86_fpmath & FPMATH_SSE)
5269 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
5271 if (TARGET_80387_P (opts->x_target_flags))
5273 warning (0, "SSE instruction set disabled, using 387 arithmetics");
5274 opts->x_ix86_fpmath = FPMATH_387;
5277 else if ((opts->x_ix86_fpmath & FPMATH_387)
5278 && !TARGET_80387_P (opts->x_target_flags))
5280 warning (0, "387 instruction set disabled, using SSE arithmetics");
5281 opts->x_ix86_fpmath = FPMATH_SSE;
5285 /* For all chips supporting SSE2, -mfpmath=sse performs better than
5286 fpmath=387. The second is however default at many targets since the
5287 extra 80bit precision of temporaries is considered to be part of ABI.
5288 Overwrite the default at least for -ffast-math.
5289 TODO: -mfpmath=both seems to produce same performing code with bit
5290 smaller binaries. It is however not clear if register allocation is
5291 ready for this setting.
5292 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
5293 codegen. We may switch to 387 with -ffast-math for size optimized
5295 else if (fast_math_flags_set_p (&global_options)
5296 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
5297 opts->x_ix86_fpmath = FPMATH_SSE;
5299 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
5301 /* Use external vectorized library in vectorizing intrinsics. */
5302 if (opts_set->x_ix86_veclibabi_type)
5303 switch (opts->x_ix86_veclibabi_type)
5305 case ix86_veclibabi_type_svml:
5306 ix86_veclib_handler = ix86_veclibabi_svml;
5309 case ix86_veclibabi_type_acml:
5310 ix86_veclib_handler = ix86_veclibabi_acml;
5317 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
5318 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
5319 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
5321 /* If stack probes are required, the space used for large function
5322 arguments on the stack must also be probed, so enable
5323 -maccumulate-outgoing-args so this happens in the prologue. */
5324 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
5325 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
5327 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
5328 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
5329 "for correctness", prefix, suffix);
5330 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
5333 /* Stack realignment without -maccumulate-outgoing-args requires %ebp,
5334 so enable -maccumulate-outgoing-args when %ebp is fixed. */
5335 if (fixed_regs[BP_REG]
5336 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
5338 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
5339 warning (0, "fixed ebp register requires %saccumulate-outgoing-args%s",
5341 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
5344 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
5347 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
5348 p = strchr (internal_label_prefix, 'X');
5349 internal_label_prefix_len = p - internal_label_prefix;
5353 /* When scheduling description is not available, disable scheduler pass
5354 so it won't slow down the compilation and make x87 code slower. */
5355 if (!TARGET_SCHEDULE)
5356 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
5358 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
5359 ix86_tune_cost->simultaneous_prefetches,
5360 opts->x_param_values,
5361 opts_set->x_param_values);
5362 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
5363 ix86_tune_cost->prefetch_block,
5364 opts->x_param_values,
5365 opts_set->x_param_values);
5366 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
5367 ix86_tune_cost->l1_cache_size,
5368 opts->x_param_values,
5369 opts_set->x_param_values);
5370 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
5371 ix86_tune_cost->l2_cache_size,
5372 opts->x_param_values,
5373 opts_set->x_param_values);
5375 /* Restrict number of if-converted SET insns to 1. */
5376 if (TARGET_ONE_IF_CONV_INSN)
5377 maybe_set_param_value (PARAM_MAX_RTL_IF_CONVERSION_INSNS,
5379 opts->x_param_values,
5380 opts_set->x_param_values);
5382 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
5383 if (opts->x_flag_prefetch_loop_arrays < 0
5385 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
5386 && !opts->x_optimize_size
5387 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
5388 opts->x_flag_prefetch_loop_arrays = 1;
5390 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
5391 can be opts->x_optimized to ap = __builtin_next_arg (0). */
5392 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
5393 targetm.expand_builtin_va_start = NULL;
5395 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5397 ix86_gen_leave = gen_leave_rex64;
5398 if (Pmode == DImode)
5400 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
5401 ix86_gen_tls_local_dynamic_base_64
5402 = gen_tls_local_dynamic_base_64_di;
5406 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
5407 ix86_gen_tls_local_dynamic_base_64
5408 = gen_tls_local_dynamic_base_64_si;
5412 ix86_gen_leave = gen_leave;
5414 if (Pmode == DImode)
5416 ix86_gen_add3 = gen_adddi3;
5417 ix86_gen_sub3 = gen_subdi3;
5418 ix86_gen_sub3_carry = gen_subdi3_carry;
5419 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
5420 ix86_gen_andsp = gen_anddi3;
5421 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
5422 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
5423 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
5424 ix86_gen_monitor = gen_sse3_monitor_di;
5425 ix86_gen_monitorx = gen_monitorx_di;
5426 ix86_gen_clzero = gen_clzero_di;
5430 ix86_gen_add3 = gen_addsi3;
5431 ix86_gen_sub3 = gen_subsi3;
5432 ix86_gen_sub3_carry = gen_subsi3_carry;
5433 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
5434 ix86_gen_andsp = gen_andsi3;
5435 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
5436 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
5437 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
5438 ix86_gen_monitor = gen_sse3_monitor_si;
5439 ix86_gen_monitorx = gen_monitorx_si;
5440 ix86_gen_clzero = gen_clzero_si;
5444 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
5445 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
5446 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
5449 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
5451 if (opts->x_flag_fentry > 0)
5452 sorry ("-mfentry isn%'t supported for 32-bit in combination "
5454 opts->x_flag_fentry = 0;
5456 else if (TARGET_SEH)
5458 if (opts->x_flag_fentry == 0)
5459 sorry ("-mno-fentry isn%'t compatible with SEH");
5460 opts->x_flag_fentry = 1;
5462 else if (opts->x_flag_fentry < 0)
5464 #if defined(PROFILE_BEFORE_PROLOGUE)
5465 opts->x_flag_fentry = 1;
5467 opts->x_flag_fentry = 0;
5471 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
5472 opts->x_target_flags |= MASK_VZEROUPPER;
5473 if (!(opts_set->x_target_flags & MASK_STV))
5474 opts->x_target_flags |= MASK_STV;
5475 /* Disable STV if -mpreferred-stack-boundary={2,3} or
5476 -mincoming-stack-boundary={2,3} - the needed
5477 stack realignment will be extra cost the pass doesn't take into
5478 account and the pass can't realign the stack. */
5479 if (ix86_preferred_stack_boundary < 128
5480 || ix86_incoming_stack_boundary < 128)
5481 opts->x_target_flags &= ~MASK_STV;
5482 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
5483 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
5484 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
5485 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
5486 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
5487 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
5488 /* Enable 128-bit AVX instruction generation
5489 for the auto-vectorizer. */
5490 if (TARGET_AVX128_OPTIMAL
5491 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
5492 opts->x_target_flags |= MASK_PREFER_AVX128;
5494 if (opts->x_ix86_recip_name)
5496 char *p = ASTRDUP (opts->x_ix86_recip_name);
5498 unsigned int mask, i;
5501 while ((q = strtok (p, ",")) != NULL)
5512 if (!strcmp (q, "default"))
5513 mask = RECIP_MASK_ALL;
5516 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
5517 if (!strcmp (q, recip_options[i].string))
5519 mask = recip_options[i].mask;
5523 if (i == ARRAY_SIZE (recip_options))
5525 error ("unknown option for -mrecip=%s", q);
5527 mask = RECIP_MASK_NONE;
5531 opts->x_recip_mask_explicit |= mask;
5533 opts->x_recip_mask &= ~mask;
5535 opts->x_recip_mask |= mask;
5539 if (TARGET_RECIP_P (opts->x_target_flags))
5540 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
5541 else if (opts_set->x_target_flags & MASK_RECIP)
5542 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
5544 /* Default long double to 64-bit for 32-bit Bionic and to __float128
5545 for 64-bit Bionic. Also default long double to 64-bit for Intel
5547 if ((TARGET_HAS_BIONIC || TARGET_IAMCU)
5548 && !(opts_set->x_target_flags
5549 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
5550 opts->x_target_flags |= (TARGET_64BIT
5551 ? MASK_LONG_DOUBLE_128
5552 : MASK_LONG_DOUBLE_64);
5554 /* Only one of them can be active. */
5555 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
5556 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
5558 /* Save the initial options in case the user does function specific
5561 target_option_default_node = target_option_current_node
5562 = build_target_option_node (opts);
5564 /* Handle stack protector */
5565 if (!opts_set->x_ix86_stack_protector_guard)
5566 opts->x_ix86_stack_protector_guard
5567 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
5569 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
5570 if (opts->x_ix86_tune_memcpy_strategy)
5572 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
5573 ix86_parse_stringop_strategy_string (str, false);
5577 if (opts->x_ix86_tune_memset_strategy)
5579 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
5580 ix86_parse_stringop_strategy_string (str, true);
5585 /* Implement the TARGET_OPTION_OVERRIDE hook. */
5588 ix86_option_override (void)
5590 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
5591 struct register_pass_info insert_vzeroupper_info
5592 = { pass_insert_vzeroupper, "reload",
5593 1, PASS_POS_INSERT_AFTER
5595 opt_pass *pass_stv = make_pass_stv (g);
5596 struct register_pass_info stv_info
5597 = { pass_stv, "combine",
5598 1, PASS_POS_INSERT_AFTER
5601 ix86_option_override_internal (true, &global_options, &global_options_set);
5604 /* This needs to be done at start up. It's convenient to do it here. */
5605 register_pass (&insert_vzeroupper_info);
5606 register_pass (&stv_info);
5609 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
5611 ix86_offload_options (void)
5614 return xstrdup ("-foffload-abi=lp64");
5615 return xstrdup ("-foffload-abi=ilp32");
5618 /* Update register usage after having seen the compiler flags. */
5621 ix86_conditional_register_usage (void)
5625 /* For 32-bit targets, squash the REX registers. */
5628 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
5629 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5630 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
5631 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5632 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
5633 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5636 /* See the definition of CALL_USED_REGISTERS in i386.h. */
5637 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
5639 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
5641 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5643 /* Set/reset conditionally defined registers from
5644 CALL_USED_REGISTERS initializer. */
5645 if (call_used_regs[i] > 1)
5646 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
5648 /* Calculate registers of CLOBBERED_REGS register set
5649 as call used registers from GENERAL_REGS register set. */
5650 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
5651 && call_used_regs[i])
5652 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
5655 /* If MMX is disabled, squash the registers. */
5657 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5658 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
5659 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5661 /* If SSE is disabled, squash the registers. */
5663 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5664 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
5665 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5667 /* If the FPU is disabled, squash the registers. */
5668 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
5669 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5670 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
5671 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5673 /* If AVX512F is disabled, squash the registers. */
5674 if (! TARGET_AVX512F)
5676 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
5677 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5679 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
5680 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5683 /* If MPX is disabled, squash the registers. */
5685 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
5686 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5690 /* Save the current options */
5693 ix86_function_specific_save (struct cl_target_option *ptr,
5694 struct gcc_options *opts)
5696 ptr->arch = ix86_arch;
5697 ptr->schedule = ix86_schedule;
5698 ptr->prefetch_sse = x86_prefetch_sse;
5699 ptr->tune = ix86_tune;
5700 ptr->branch_cost = ix86_branch_cost;
5701 ptr->tune_defaulted = ix86_tune_defaulted;
5702 ptr->arch_specified = ix86_arch_specified;
5703 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
5704 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
5705 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
5706 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
5707 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
5708 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
5709 ptr->x_ix86_abi = opts->x_ix86_abi;
5710 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
5711 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
5712 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
5713 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
5714 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
5715 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
5716 ptr->x_ix86_pmode = opts->x_ix86_pmode;
5717 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
5718 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
5719 ptr->x_ix86_regparm = opts->x_ix86_regparm;
5720 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
5721 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
5722 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
5723 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
5724 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
5725 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
5726 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
5727 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
5728 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
5729 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
5731 /* The fields are char but the variables are not; make sure the
5732 values fit in the fields. */
5733 gcc_assert (ptr->arch == ix86_arch);
5734 gcc_assert (ptr->schedule == ix86_schedule);
5735 gcc_assert (ptr->tune == ix86_tune);
5736 gcc_assert (ptr->branch_cost == ix86_branch_cost);
5739 /* Restore the current options */
5742 ix86_function_specific_restore (struct gcc_options *opts,
5743 struct cl_target_option *ptr)
5745 enum processor_type old_tune = ix86_tune;
5746 enum processor_type old_arch = ix86_arch;
5747 unsigned int ix86_arch_mask;
5750 /* We don't change -fPIC. */
5751 opts->x_flag_pic = flag_pic;
5753 ix86_arch = (enum processor_type) ptr->arch;
5754 ix86_schedule = (enum attr_cpu) ptr->schedule;
5755 ix86_tune = (enum processor_type) ptr->tune;
5756 x86_prefetch_sse = ptr->prefetch_sse;
5757 opts->x_ix86_branch_cost = ptr->branch_cost;
5758 ix86_tune_defaulted = ptr->tune_defaulted;
5759 ix86_arch_specified = ptr->arch_specified;
5760 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
5761 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
5762 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
5763 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
5764 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
5765 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
5766 opts->x_ix86_abi = ptr->x_ix86_abi;
5767 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
5768 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
5769 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
5770 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
5771 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
5772 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
5773 opts->x_ix86_pmode = ptr->x_ix86_pmode;
5774 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
5775 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
5776 opts->x_ix86_regparm = ptr->x_ix86_regparm;
5777 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
5778 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
5779 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
5780 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
5781 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
5782 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
5783 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
5784 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
5785 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
5786 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
5787 ix86_tune_cost = processor_target_table[ix86_tune].cost;
5788 /* TODO: ix86_cost should be chosen at instruction or function granuality
5789 so for cold code we use size_cost even in !optimize_size compilation. */
5790 if (opts->x_optimize_size)
5791 ix86_cost = &ix86_size_cost;
5793 ix86_cost = ix86_tune_cost;
5795 /* Recreate the arch feature tests if the arch changed */
5796 if (old_arch != ix86_arch)
5798 ix86_arch_mask = 1u << ix86_arch;
5799 for (i = 0; i < X86_ARCH_LAST; ++i)
5800 ix86_arch_features[i]
5801 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
5804 /* Recreate the tune optimization tests */
5805 if (old_tune != ix86_tune)
5806 set_ix86_tune_features (ix86_tune, false);
5809 /* Adjust target options after streaming them in. This is mainly about
5810 reconciling them with global options. */
5813 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
5815 /* flag_pic is a global option, but ix86_cmodel is target saved option
5816 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
5817 for PIC, or error out. */
5819 switch (ptr->x_ix86_cmodel)
5822 ptr->x_ix86_cmodel = CM_SMALL_PIC;
5826 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
5830 ptr->x_ix86_cmodel = CM_LARGE_PIC;
5834 error ("code model %s does not support PIC mode", "kernel");
5841 switch (ptr->x_ix86_cmodel)
5844 ptr->x_ix86_cmodel = CM_SMALL;
5848 ptr->x_ix86_cmodel = CM_MEDIUM;
5852 ptr->x_ix86_cmodel = CM_LARGE;
5860 /* Print the current options */
5863 ix86_function_specific_print (FILE *file, int indent,
5864 struct cl_target_option *ptr)
5867 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
5868 NULL, NULL, ptr->x_ix86_fpmath, false);
5870 gcc_assert (ptr->arch < PROCESSOR_max);
5871 fprintf (file, "%*sarch = %d (%s)\n",
5873 ptr->arch, processor_target_table[ptr->arch].name);
5875 gcc_assert (ptr->tune < PROCESSOR_max);
5876 fprintf (file, "%*stune = %d (%s)\n",
5878 ptr->tune, processor_target_table[ptr->tune].name);
5880 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
5884 fprintf (file, "%*s%s\n", indent, "", target_string);
5885 free (target_string);
5890 /* Inner function to process the attribute((target(...))), take an argument and
5891 set the current options from the argument. If we have a list, recursively go
5895 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
5896 struct gcc_options *opts,
5897 struct gcc_options *opts_set,
5898 struct gcc_options *enum_opts_set)
5903 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
5904 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
5905 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
5906 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
5907 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
5923 enum ix86_opt_type type;
5928 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
5929 IX86_ATTR_ISA ("abm", OPT_mabm),
5930 IX86_ATTR_ISA ("bmi", OPT_mbmi),
5931 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
5932 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
5933 IX86_ATTR_ISA ("tbm", OPT_mtbm),
5934 IX86_ATTR_ISA ("aes", OPT_maes),
5935 IX86_ATTR_ISA ("sha", OPT_msha),
5936 IX86_ATTR_ISA ("avx", OPT_mavx),
5937 IX86_ATTR_ISA ("avx2", OPT_mavx2),
5938 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
5939 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
5940 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
5941 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
5942 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
5943 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
5944 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
5945 IX86_ATTR_ISA ("mmx", OPT_mmmx),
5946 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
5947 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
5948 IX86_ATTR_ISA ("sse", OPT_msse),
5949 IX86_ATTR_ISA ("sse2", OPT_msse2),
5950 IX86_ATTR_ISA ("sse3", OPT_msse3),
5951 IX86_ATTR_ISA ("sse4", OPT_msse4),
5952 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
5953 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
5954 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
5955 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
5956 IX86_ATTR_ISA ("fma4", OPT_mfma4),
5957 IX86_ATTR_ISA ("fma", OPT_mfma),
5958 IX86_ATTR_ISA ("xop", OPT_mxop),
5959 IX86_ATTR_ISA ("lwp", OPT_mlwp),
5960 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
5961 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
5962 IX86_ATTR_ISA ("f16c", OPT_mf16c),
5963 IX86_ATTR_ISA ("rtm", OPT_mrtm),
5964 IX86_ATTR_ISA ("hle", OPT_mhle),
5965 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
5966 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
5967 IX86_ATTR_ISA ("adx", OPT_madx),
5968 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
5969 IX86_ATTR_ISA ("xsave", OPT_mxsave),
5970 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
5971 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
5972 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
5973 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
5974 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
5975 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
5976 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
5977 IX86_ATTR_ISA ("clwb", OPT_mclwb),
5978 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
5979 IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx),
5980 IX86_ATTR_ISA ("clzero", OPT_mclzero),
5981 IX86_ATTR_ISA ("pku", OPT_mpku),
5984 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
5986 /* string options */
5987 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
5988 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
5991 IX86_ATTR_YES ("cld",
5995 IX86_ATTR_NO ("fancy-math-387",
5996 OPT_mfancy_math_387,
5997 MASK_NO_FANCY_MATH_387),
5999 IX86_ATTR_YES ("ieee-fp",
6003 IX86_ATTR_YES ("inline-all-stringops",
6004 OPT_minline_all_stringops,
6005 MASK_INLINE_ALL_STRINGOPS),
6007 IX86_ATTR_YES ("inline-stringops-dynamically",
6008 OPT_minline_stringops_dynamically,
6009 MASK_INLINE_STRINGOPS_DYNAMICALLY),
6011 IX86_ATTR_NO ("align-stringops",
6012 OPT_mno_align_stringops,
6013 MASK_NO_ALIGN_STRINGOPS),
6015 IX86_ATTR_YES ("recip",
6021 /* If this is a list, recurse to get the options. */
6022 if (TREE_CODE (args) == TREE_LIST)
6026 for (; args; args = TREE_CHAIN (args))
6027 if (TREE_VALUE (args)
6028 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
6029 p_strings, opts, opts_set,
6036 else if (TREE_CODE (args) != STRING_CST)
6038 error ("attribute %<target%> argument not a string");
6042 /* Handle multiple arguments separated by commas. */
6043 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
6045 while (next_optstr && *next_optstr != '\0')
6047 char *p = next_optstr;
6049 char *comma = strchr (next_optstr, ',');
6050 const char *opt_string;
6051 size_t len, opt_len;
6056 enum ix86_opt_type type = ix86_opt_unknown;
6062 len = comma - next_optstr;
6063 next_optstr = comma + 1;
6071 /* Recognize no-xxx. */
6072 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
6081 /* Find the option. */
6084 for (i = 0; i < ARRAY_SIZE (attrs); i++)
6086 type = attrs[i].type;
6087 opt_len = attrs[i].len;
6088 if (ch == attrs[i].string[0]
6089 && ((type != ix86_opt_str && type != ix86_opt_enum)
6092 && memcmp (p, attrs[i].string, opt_len) == 0)
6095 mask = attrs[i].mask;
6096 opt_string = attrs[i].string;
6101 /* Process the option. */
6104 error ("attribute(target(\"%s\")) is unknown", orig_p);
6108 else if (type == ix86_opt_isa)
6110 struct cl_decoded_option decoded;
6112 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
6113 ix86_handle_option (opts, opts_set,
6114 &decoded, input_location);
6117 else if (type == ix86_opt_yes || type == ix86_opt_no)
6119 if (type == ix86_opt_no)
6120 opt_set_p = !opt_set_p;
6123 opts->x_target_flags |= mask;
6125 opts->x_target_flags &= ~mask;
6128 else if (type == ix86_opt_str)
6132 error ("option(\"%s\") was already specified", opt_string);
6136 p_strings[opt] = xstrdup (p + opt_len);
6139 else if (type == ix86_opt_enum)
6144 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
6146 set_option (opts, enum_opts_set, opt, value,
6147 p + opt_len, DK_UNSPECIFIED, input_location,
6151 error ("attribute(target(\"%s\")) is unknown", orig_p);
6163 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
6166 ix86_valid_target_attribute_tree (tree args,
6167 struct gcc_options *opts,
6168 struct gcc_options *opts_set)
6170 const char *orig_arch_string = opts->x_ix86_arch_string;
6171 const char *orig_tune_string = opts->x_ix86_tune_string;
6172 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
6173 int orig_tune_defaulted = ix86_tune_defaulted;
6174 int orig_arch_specified = ix86_arch_specified;
6175 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
6178 struct cl_target_option *def
6179 = TREE_TARGET_OPTION (target_option_default_node);
6180 struct gcc_options enum_opts_set;
6182 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
6184 /* Process each of the options on the chain. */
6185 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
6186 opts_set, &enum_opts_set))
6187 return error_mark_node;
6189 /* If the changed options are different from the default, rerun
6190 ix86_option_override_internal, and then save the options away.
6191 The string options are attribute options, and will be undone
6192 when we copy the save structure. */
6193 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
6194 || opts->x_target_flags != def->x_target_flags
6195 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
6196 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
6197 || enum_opts_set.x_ix86_fpmath)
6199 /* If we are using the default tune= or arch=, undo the string assigned,
6200 and use the default. */
6201 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
6203 opts->x_ix86_arch_string
6204 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]);
6206 /* If arch= is set, clear all bits in x_ix86_isa_flags,
6207 except for ISA_64BIT, ABI_64, ABI_X32, and CODE16. */
6208 opts->x_ix86_isa_flags &= (OPTION_MASK_ISA_64BIT
6209 | OPTION_MASK_ABI_64
6210 | OPTION_MASK_ABI_X32
6211 | OPTION_MASK_CODE16);
6214 else if (!orig_arch_specified)
6215 opts->x_ix86_arch_string = NULL;
6217 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
6218 opts->x_ix86_tune_string
6219 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]);
6220 else if (orig_tune_defaulted)
6221 opts->x_ix86_tune_string = NULL;
6223 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
6224 if (enum_opts_set.x_ix86_fpmath)
6225 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
6226 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
6227 && TARGET_SSE_P (opts->x_ix86_isa_flags))
6229 if (TARGET_80387_P (opts->x_target_flags))
6230 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE
6233 opts->x_ix86_fpmath = (enum fpmath_unit) FPMATH_SSE;
6234 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
6237 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
6238 ix86_option_override_internal (false, opts, opts_set);
6240 /* Add any builtin functions with the new isa if any. */
6241 ix86_add_new_builtins (opts->x_ix86_isa_flags);
6243 /* Save the current options unless we are validating options for
6245 t = build_target_option_node (opts);
6247 opts->x_ix86_arch_string = orig_arch_string;
6248 opts->x_ix86_tune_string = orig_tune_string;
6249 opts_set->x_ix86_fpmath = orig_fpmath_set;
6251 /* Free up memory allocated to hold the strings */
6252 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
6253 free (option_strings[i]);
6259 /* Hook to validate attribute((target("string"))). */
6262 ix86_valid_target_attribute_p (tree fndecl,
6263 tree ARG_UNUSED (name),
6265 int ARG_UNUSED (flags))
6267 struct gcc_options func_options;
6268 tree new_target, new_optimize;
6271 /* attribute((target("default"))) does nothing, beyond
6272 affecting multi-versioning. */
6273 if (TREE_VALUE (args)
6274 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
6275 && TREE_CHAIN (args) == NULL_TREE
6276 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
6279 tree old_optimize = build_optimization_node (&global_options);
6281 /* Get the optimization options of the current function. */
6282 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
6285 func_optimize = old_optimize;
6287 /* Init func_options. */
6288 memset (&func_options, 0, sizeof (func_options));
6289 init_options_struct (&func_options, NULL);
6290 lang_hooks.init_options_struct (&func_options);
6292 cl_optimization_restore (&func_options,
6293 TREE_OPTIMIZATION (func_optimize));
6295 /* Initialize func_options to the default before its target options can
6297 cl_target_option_restore (&func_options,
6298 TREE_TARGET_OPTION (target_option_default_node));
6300 new_target = ix86_valid_target_attribute_tree (args, &func_options,
6301 &global_options_set);
6303 new_optimize = build_optimization_node (&func_options);
6305 if (new_target == error_mark_node)
6308 else if (fndecl && new_target)
6310 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
6312 if (old_optimize != new_optimize)
6313 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
6316 finalize_options_struct (&func_options);
6322 /* Hook to determine if one function can safely inline another. */
6325 ix86_can_inline_p (tree caller, tree callee)
6328 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
6329 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
6331 /* If callee has no option attributes, then it is ok to inline. */
6335 /* If caller has no option attributes, but callee does then it is not ok to
6337 else if (!caller_tree)
6342 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
6343 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
6345 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
6346 can inline a SSE2 function but a SSE2 function can't inline a SSE4
6348 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
6349 != callee_opts->x_ix86_isa_flags)
6352 /* See if we have the same non-isa options. */
6353 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
6356 /* See if arch, tune, etc. are the same. */
6357 else if (caller_opts->arch != callee_opts->arch)
6360 else if (caller_opts->tune != callee_opts->tune)
6363 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
6366 else if (caller_opts->branch_cost != callee_opts->branch_cost)
6377 /* Remember the last target of ix86_set_current_function. */
6378 static GTY(()) tree ix86_previous_fndecl;
6380 /* Set targets globals to the default (or current #pragma GCC target
6381 if active). Invalidate ix86_previous_fndecl cache. */
6384 ix86_reset_previous_fndecl (void)
6386 tree new_tree = target_option_current_node;
6387 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6388 if (TREE_TARGET_GLOBALS (new_tree))
6389 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
6390 else if (new_tree == target_option_default_node)
6391 restore_target_globals (&default_target_globals);
6393 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
6394 ix86_previous_fndecl = NULL_TREE;
6397 /* Establish appropriate back-end context for processing the function
6398 FNDECL. The argument might be NULL to indicate processing at top
6399 level, outside of any function scope. */
6401 ix86_set_current_function (tree fndecl)
6403 /* Only change the context if the function changes. This hook is called
6404 several times in the course of compiling a function, and we don't want to
6405 slow things down too much or call target_reinit when it isn't safe. */
6406 if (fndecl == ix86_previous_fndecl)
6410 if (ix86_previous_fndecl == NULL_TREE)
6411 old_tree = target_option_current_node;
6412 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
6413 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
6415 old_tree = target_option_default_node;
6417 if (fndecl == NULL_TREE)
6419 if (old_tree != target_option_current_node)
6420 ix86_reset_previous_fndecl ();
6424 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
6425 if (new_tree == NULL_TREE)
6426 new_tree = target_option_default_node;
6428 if (old_tree != new_tree)
6430 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6431 if (TREE_TARGET_GLOBALS (new_tree))
6432 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
6433 else if (new_tree == target_option_default_node)
6434 restore_target_globals (&default_target_globals);
6436 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
6438 ix86_previous_fndecl = fndecl;
6440 /* 64-bit MS and SYSV ABI have different set of call used registers.
6441 Avoid expensive re-initialization of init_regs each time we switch
6442 function context. */
6444 && (call_used_regs[SI_REG]
6445 == (cfun->machine->call_abi == MS_ABI)))
6450 /* Return true if this goes in large data/bss. */
6453 ix86_in_large_data_p (tree exp)
6455 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
6458 /* Functions are never large data. */
6459 if (TREE_CODE (exp) == FUNCTION_DECL)
6462 /* Automatic variables are never large data. */
6463 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
6466 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
6468 const char *section = DECL_SECTION_NAME (exp);
6469 if (strcmp (section, ".ldata") == 0
6470 || strcmp (section, ".lbss") == 0)
6476 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
6478 /* If this is an incomplete type with size 0, then we can't put it
6479 in data because it might be too big when completed. Also,
6480 int_size_in_bytes returns -1 if size can vary or is larger than
6481 an integer in which case also it is safer to assume that it goes in
6483 if (size <= 0 || size > ix86_section_threshold)
6490 /* Switch to the appropriate section for output of DECL.
6491 DECL is either a `VAR_DECL' node or a constant of some sort.
6492 RELOC indicates whether forming the initial value of DECL requires
6493 link-time relocations. */
6495 ATTRIBUTE_UNUSED static section *
6496 x86_64_elf_select_section (tree decl, int reloc,
6497 unsigned HOST_WIDE_INT align)
6499 if (ix86_in_large_data_p (decl))
6501 const char *sname = NULL;
6502 unsigned int flags = SECTION_WRITE;
6503 switch (categorize_decl_for_section (decl, reloc))
6508 case SECCAT_DATA_REL:
6509 sname = ".ldata.rel";
6511 case SECCAT_DATA_REL_LOCAL:
6512 sname = ".ldata.rel.local";
6514 case SECCAT_DATA_REL_RO:
6515 sname = ".ldata.rel.ro";
6517 case SECCAT_DATA_REL_RO_LOCAL:
6518 sname = ".ldata.rel.ro.local";
6522 flags |= SECTION_BSS;
6525 case SECCAT_RODATA_MERGE_STR:
6526 case SECCAT_RODATA_MERGE_STR_INIT:
6527 case SECCAT_RODATA_MERGE_CONST:
6531 case SECCAT_SRODATA:
6538 /* We don't split these for medium model. Place them into
6539 default sections and hope for best. */
6544 /* We might get called with string constants, but get_named_section
6545 doesn't like them as they are not DECLs. Also, we need to set
6546 flags in that case. */
6548 return get_section (sname, flags, NULL);
6549 return get_named_section (decl, sname, reloc);
6552 return default_elf_select_section (decl, reloc, align);
6555 /* Select a set of attributes for section NAME based on the properties
6556 of DECL and whether or not RELOC indicates that DECL's initializer
6557 might contain runtime relocations. */
6559 static unsigned int ATTRIBUTE_UNUSED
6560 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
6562 unsigned int flags = default_section_type_flags (decl, name, reloc);
6564 if (decl == NULL_TREE
6565 && (strcmp (name, ".ldata.rel.ro") == 0
6566 || strcmp (name, ".ldata.rel.ro.local") == 0))
6567 flags |= SECTION_RELRO;
6569 if (strcmp (name, ".lbss") == 0
6570 || strncmp (name, ".lbss.", 5) == 0
6571 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
6572 flags |= SECTION_BSS;
6577 /* Build up a unique section name, expressed as a
6578 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
6579 RELOC indicates whether the initial value of EXP requires
6580 link-time relocations. */
6582 static void ATTRIBUTE_UNUSED
6583 x86_64_elf_unique_section (tree decl, int reloc)
6585 if (ix86_in_large_data_p (decl))
6587 const char *prefix = NULL;
6588 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
6589 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
6591 switch (categorize_decl_for_section (decl, reloc))
6594 case SECCAT_DATA_REL:
6595 case SECCAT_DATA_REL_LOCAL:
6596 case SECCAT_DATA_REL_RO:
6597 case SECCAT_DATA_REL_RO_LOCAL:
6598 prefix = one_only ? ".ld" : ".ldata";
6601 prefix = one_only ? ".lb" : ".lbss";
6604 case SECCAT_RODATA_MERGE_STR:
6605 case SECCAT_RODATA_MERGE_STR_INIT:
6606 case SECCAT_RODATA_MERGE_CONST:
6607 prefix = one_only ? ".lr" : ".lrodata";
6609 case SECCAT_SRODATA:
6616 /* We don't split these for medium model. Place them into
6617 default sections and hope for best. */
6622 const char *name, *linkonce;
6625 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
6626 name = targetm.strip_name_encoding (name);
6628 /* If we're using one_only, then there needs to be a .gnu.linkonce
6629 prefix to the section name. */
6630 linkonce = one_only ? ".gnu.linkonce" : "";
6632 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
6634 set_decl_section_name (decl, string);
6638 default_unique_section (decl, reloc);
6641 #ifdef COMMON_ASM_OP
6642 /* This says how to output assembler code to declare an
6643 uninitialized external linkage data object.
6645 For medium model x86-64 we need to use .largecomm opcode for
6648 x86_elf_aligned_common (FILE *file,
6649 const char *name, unsigned HOST_WIDE_INT size,
6652 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6653 && size > (unsigned int)ix86_section_threshold)
6654 fputs ("\t.largecomm\t", file);
6656 fputs (COMMON_ASM_OP, file);
6657 assemble_name (file, name);
6658 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
6659 size, align / BITS_PER_UNIT);
6663 /* Utility function for targets to use in implementing
6664 ASM_OUTPUT_ALIGNED_BSS. */
6667 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
6668 unsigned HOST_WIDE_INT size, int align)
6670 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6671 && size > (unsigned int)ix86_section_threshold)
6672 switch_to_section (get_named_section (decl, ".lbss", 0));
6674 switch_to_section (bss_section);
6675 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
6676 #ifdef ASM_DECLARE_OBJECT_NAME
6677 last_assemble_variable_decl = decl;
6678 ASM_DECLARE_OBJECT_NAME (file, name, decl);
6680 /* Standard thing is just output label for the object. */
6681 ASM_OUTPUT_LABEL (file, name);
6682 #endif /* ASM_DECLARE_OBJECT_NAME */
6683 ASM_OUTPUT_SKIP (file, size ? size : 1);
6686 /* Decide whether we must probe the stack before any space allocation
6687 on this target. It's essentially TARGET_STACK_PROBE except when
6688 -fstack-check causes the stack to be already probed differently. */
6691 ix86_target_stack_probe (void)
6693 /* Do not probe the stack twice if static stack checking is enabled. */
6694 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
6697 return TARGET_STACK_PROBE;
6700 /* Decide whether we can make a sibling call to a function. DECL is the
6701 declaration of the function being targeted by the call and EXP is the
6702 CALL_EXPR representing the call. */
6705 ix86_function_ok_for_sibcall (tree decl, tree exp)
6707 tree type, decl_or_type;
6709 bool bind_global = decl && !targetm.binds_local_p (decl);
6711 /* If we are generating position-independent code, we cannot sibcall
6712 optimize direct calls to global functions, as the PLT requires
6713 %ebx be live. (Darwin does not have a PLT.) */
6721 /* If we need to align the outgoing stack, then sibcalling would
6722 unalign the stack, which may break the called function. */
6723 if (ix86_minimum_incoming_stack_boundary (true)
6724 < PREFERRED_STACK_BOUNDARY)
6729 decl_or_type = decl;
6730 type = TREE_TYPE (decl);
6734 /* We're looking at the CALL_EXPR, we need the type of the function. */
6735 type = CALL_EXPR_FN (exp); /* pointer expression */
6736 type = TREE_TYPE (type); /* pointer type */
6737 type = TREE_TYPE (type); /* function type */
6738 decl_or_type = type;
6741 /* Check that the return value locations are the same. Like
6742 if we are returning floats on the 80387 register stack, we cannot
6743 make a sibcall from a function that doesn't return a float to a
6744 function that does or, conversely, from a function that does return
6745 a float to a function that doesn't; the necessary stack adjustment
6746 would not be executed. This is also the place we notice
6747 differences in the return value ABI. Note that it is ok for one
6748 of the functions to have void return type as long as the return
6749 value of the other is passed in a register. */
6750 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
6751 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6753 if (STACK_REG_P (a) || STACK_REG_P (b))
6755 if (!rtx_equal_p (a, b))
6758 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6760 else if (!rtx_equal_p (a, b))
6765 /* The SYSV ABI has more call-clobbered registers;
6766 disallow sibcalls from MS to SYSV. */
6767 if (cfun->machine->call_abi == MS_ABI
6768 && ix86_function_type_abi (type) == SYSV_ABI)
6773 /* If this call is indirect, we'll need to be able to use a
6774 call-clobbered register for the address of the target function.
6775 Make sure that all such registers are not used for passing
6776 parameters. Note that DLLIMPORT functions and call to global
6777 function via GOT slot are indirect. */
6779 || (bind_global && flag_pic && !flag_plt)
6780 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
6782 /* Check if regparm >= 3 since arg_reg_available is set to
6783 false if regparm == 0. If regparm is 1 or 2, there is
6784 always a call-clobbered register available.
6786 ??? The symbol indirect call doesn't need a call-clobbered
6787 register. But we don't know if this is a symbol indirect
6788 call or not here. */
6789 if (ix86_function_regparm (type, NULL) >= 3
6790 && !cfun->machine->arg_reg_available)
6795 /* Otherwise okay. That also includes certain types of indirect calls. */
6799 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
6800 and "sseregparm" calling convention attributes;
6801 arguments as in struct attribute_spec.handler. */
6804 ix86_handle_cconv_attribute (tree *node, tree name,
6809 if (TREE_CODE (*node) != FUNCTION_TYPE
6810 && TREE_CODE (*node) != METHOD_TYPE
6811 && TREE_CODE (*node) != FIELD_DECL
6812 && TREE_CODE (*node) != TYPE_DECL)
6814 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6816 *no_add_attrs = true;
6820 /* Can combine regparm with all attributes but fastcall, and thiscall. */
6821 if (is_attribute_p ("regparm", name))
6825 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6827 error ("fastcall and regparm attributes are not compatible");
6830 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6832 error ("regparam and thiscall attributes are not compatible");
6835 cst = TREE_VALUE (args);
6836 if (TREE_CODE (cst) != INTEGER_CST)
6838 warning (OPT_Wattributes,
6839 "%qE attribute requires an integer constant argument",
6841 *no_add_attrs = true;
6843 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
6845 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
6847 *no_add_attrs = true;
6855 /* Do not warn when emulating the MS ABI. */
6856 if ((TREE_CODE (*node) != FUNCTION_TYPE
6857 && TREE_CODE (*node) != METHOD_TYPE)
6858 || ix86_function_type_abi (*node) != MS_ABI)
6859 warning (OPT_Wattributes, "%qE attribute ignored",
6861 *no_add_attrs = true;
6865 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
6866 if (is_attribute_p ("fastcall", name))
6868 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6870 error ("fastcall and cdecl attributes are not compatible");
6872 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6874 error ("fastcall and stdcall attributes are not compatible");
6876 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
6878 error ("fastcall and regparm attributes are not compatible");
6880 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6882 error ("fastcall and thiscall attributes are not compatible");
6886 /* Can combine stdcall with fastcall (redundant), regparm and
6888 else if (is_attribute_p ("stdcall", name))
6890 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6892 error ("stdcall and cdecl attributes are not compatible");
6894 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6896 error ("stdcall and fastcall attributes are not compatible");
6898 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6900 error ("stdcall and thiscall attributes are not compatible");
6904 /* Can combine cdecl with regparm and sseregparm. */
6905 else if (is_attribute_p ("cdecl", name))
6907 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6909 error ("stdcall and cdecl attributes are not compatible");
6911 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6913 error ("fastcall and cdecl attributes are not compatible");
6915 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6917 error ("cdecl and thiscall attributes are not compatible");
6920 else if (is_attribute_p ("thiscall", name))
6922 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
6923 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
6925 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6927 error ("stdcall and thiscall attributes are not compatible");
6929 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6931 error ("fastcall and thiscall attributes are not compatible");
6933 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6935 error ("cdecl and thiscall attributes are not compatible");
6939 /* Can combine sseregparm with all attributes. */
6944 /* The transactional memory builtins are implicitly regparm or fastcall
6945 depending on the ABI. Override the generic do-nothing attribute that
6946 these builtins were declared with, and replace it with one of the two
6947 attributes that we expect elsewhere. */
6950 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
6951 int flags, bool *no_add_attrs)
6955 /* In no case do we want to add the placeholder attribute. */
6956 *no_add_attrs = true;
6958 /* The 64-bit ABI is unchanged for transactional memory. */
6962 /* ??? Is there a better way to validate 32-bit windows? We have
6963 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
6964 if (CHECK_STACK_LIMIT > 0)
6965 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
6968 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
6969 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
6971 decl_attributes (node, alt, flags);
6976 /* This function determines from TYPE the calling-convention. */
6979 ix86_get_callcvt (const_tree type)
6981 unsigned int ret = 0;
6986 return IX86_CALLCVT_CDECL;
6988 attrs = TYPE_ATTRIBUTES (type);
6989 if (attrs != NULL_TREE)
6991 if (lookup_attribute ("cdecl", attrs))
6992 ret |= IX86_CALLCVT_CDECL;
6993 else if (lookup_attribute ("stdcall", attrs))
6994 ret |= IX86_CALLCVT_STDCALL;
6995 else if (lookup_attribute ("fastcall", attrs))
6996 ret |= IX86_CALLCVT_FASTCALL;
6997 else if (lookup_attribute ("thiscall", attrs))
6998 ret |= IX86_CALLCVT_THISCALL;
7000 /* Regparam isn't allowed for thiscall and fastcall. */
7001 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
7003 if (lookup_attribute ("regparm", attrs))
7004 ret |= IX86_CALLCVT_REGPARM;
7005 if (lookup_attribute ("sseregparm", attrs))
7006 ret |= IX86_CALLCVT_SSEREGPARM;
7009 if (IX86_BASE_CALLCVT(ret) != 0)
7013 is_stdarg = stdarg_p (type);
7014 if (TARGET_RTD && !is_stdarg)
7015 return IX86_CALLCVT_STDCALL | ret;
7019 || TREE_CODE (type) != METHOD_TYPE
7020 || ix86_function_type_abi (type) != MS_ABI)
7021 return IX86_CALLCVT_CDECL | ret;
7023 return IX86_CALLCVT_THISCALL;
7026 /* Return 0 if the attributes for two types are incompatible, 1 if they
7027 are compatible, and 2 if they are nearly compatible (which causes a
7028 warning to be generated). */
7031 ix86_comp_type_attributes (const_tree type1, const_tree type2)
7033 unsigned int ccvt1, ccvt2;
7035 if (TREE_CODE (type1) != FUNCTION_TYPE
7036 && TREE_CODE (type1) != METHOD_TYPE)
7039 ccvt1 = ix86_get_callcvt (type1);
7040 ccvt2 = ix86_get_callcvt (type2);
7043 if (ix86_function_regparm (type1, NULL)
7044 != ix86_function_regparm (type2, NULL))
7050 /* Return the regparm value for a function with the indicated TYPE and DECL.
7051 DECL may be NULL when calling function indirectly
7052 or considering a libcall. */
7055 ix86_function_regparm (const_tree type, const_tree decl)
7062 return (ix86_function_type_abi (type) == SYSV_ABI
7063 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
7064 ccvt = ix86_get_callcvt (type);
7065 regparm = ix86_regparm;
7067 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
7069 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
7072 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
7076 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
7078 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
7081 /* Use register calling convention for local functions when possible. */
7083 && TREE_CODE (decl) == FUNCTION_DECL)
7085 cgraph_node *target = cgraph_node::get (decl);
7087 target = target->function_symbol ();
7089 /* Caller and callee must agree on the calling convention, so
7090 checking here just optimize means that with
7091 __attribute__((optimize (...))) caller could use regparm convention
7092 and callee not, or vice versa. Instead look at whether the callee
7093 is optimized or not. */
7094 if (target && opt_for_fn (target->decl, optimize)
7095 && !(profile_flag && !flag_fentry))
7097 cgraph_local_info *i = &target->local;
7098 if (i && i->local && i->can_change_signature)
7100 int local_regparm, globals = 0, regno;
7102 /* Make sure no regparm register is taken by a
7103 fixed register variable. */
7104 for (local_regparm = 0; local_regparm < REGPARM_MAX;
7106 if (fixed_regs[local_regparm])
7109 /* We don't want to use regparm(3) for nested functions as
7110 these use a static chain pointer in the third argument. */
7111 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
7114 /* Save a register for the split stack. */
7115 if (local_regparm == 3 && flag_split_stack)
7118 /* Each fixed register usage increases register pressure,
7119 so less registers should be used for argument passing.
7120 This functionality can be overriden by an explicit
7122 for (regno = AX_REG; regno <= DI_REG; regno++)
7123 if (fixed_regs[regno])
7127 = globals < local_regparm ? local_regparm - globals : 0;
7129 if (local_regparm > regparm)
7130 regparm = local_regparm;
7138 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
7139 DFmode (2) arguments in SSE registers for a function with the
7140 indicated TYPE and DECL. DECL may be NULL when calling function
7141 indirectly or considering a libcall. Return -1 if any FP parameter
7142 should be rejected by error. This is used in siutation we imply SSE
7143 calling convetion but the function is called from another function with
7144 SSE disabled. Otherwise return 0. */
7147 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
7149 gcc_assert (!TARGET_64BIT);
7151 /* Use SSE registers to pass SFmode and DFmode arguments if requested
7152 by the sseregparm attribute. */
7153 if (TARGET_SSEREGPARM
7154 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
7161 error ("calling %qD with attribute sseregparm without "
7162 "SSE/SSE2 enabled", decl);
7164 error ("calling %qT with attribute sseregparm without "
7165 "SSE/SSE2 enabled", type);
7176 cgraph_node *target = cgraph_node::get (decl);
7178 target = target->function_symbol ();
7180 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
7181 (and DFmode for SSE2) arguments in SSE registers. */
7183 /* TARGET_SSE_MATH */
7184 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
7185 && opt_for_fn (target->decl, optimize)
7186 && !(profile_flag && !flag_fentry))
7188 cgraph_local_info *i = &target->local;
7189 if (i && i->local && i->can_change_signature)
7191 /* Refuse to produce wrong code when local function with SSE enabled
7192 is called from SSE disabled function.
7193 FIXME: We need a way to detect these cases cross-ltrans partition
7194 and avoid using SSE calling conventions on local functions called
7195 from function with SSE disabled. For now at least delay the
7196 warning until we know we are going to produce wrong code.
7198 if (!TARGET_SSE && warn)
7200 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
7201 ->x_ix86_isa_flags) ? 2 : 1;
7208 /* Return true if EAX is live at the start of the function. Used by
7209 ix86_expand_prologue to determine if we need special help before
7210 calling allocate_stack_worker. */
7213 ix86_eax_live_at_start_p (void)
7215 /* Cheat. Don't bother working forward from ix86_function_regparm
7216 to the function type to whether an actual argument is located in
7217 eax. Instead just look at cfg info, which is still close enough
7218 to correct at this point. This gives false positives for broken
7219 functions that might use uninitialized data that happens to be
7220 allocated in eax, but who cares? */
7221 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
7225 ix86_keep_aggregate_return_pointer (tree fntype)
7231 attr = lookup_attribute ("callee_pop_aggregate_return",
7232 TYPE_ATTRIBUTES (fntype));
7234 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
7236 /* For 32-bit MS-ABI the default is to keep aggregate
7238 if (ix86_function_type_abi (fntype) == MS_ABI)
7241 return KEEP_AGGREGATE_RETURN_POINTER != 0;
7244 /* Value is the number of bytes of arguments automatically
7245 popped when returning from a subroutine call.
7246 FUNDECL is the declaration node of the function (as a tree),
7247 FUNTYPE is the data type of the function (as a tree),
7248 or for a library call it is an identifier node for the subroutine name.
7249 SIZE is the number of bytes of arguments passed on the stack.
7251 On the 80386, the RTD insn may be used to pop them if the number
7252 of args is fixed, but if the number is variable then the caller
7253 must pop them all. RTD can't be used for library calls now
7254 because the library is compiled with the Unix compiler.
7255 Use of RTD is a selectable option, since it is incompatible with
7256 standard Unix calling sequences. If the option is not selected,
7257 the caller must always pop the args.
7259 The attribute stdcall is equivalent to RTD on a per module basis. */
7262 ix86_return_pops_args (tree fundecl, tree funtype, int size)
7266 /* None of the 64-bit ABIs pop arguments. */
7270 ccvt = ix86_get_callcvt (funtype);
7272 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
7273 | IX86_CALLCVT_THISCALL)) != 0
7274 && ! stdarg_p (funtype))
7277 /* Lose any fake structure return argument if it is passed on the stack. */
7278 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
7279 && !ix86_keep_aggregate_return_pointer (funtype))
7281 int nregs = ix86_function_regparm (funtype, fundecl);
7283 return GET_MODE_SIZE (Pmode);
7289 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
7292 ix86_legitimate_combined_insn (rtx_insn *insn)
7294 /* Check operand constraints in case hard registers were propagated
7295 into insn pattern. This check prevents combine pass from
7296 generating insn patterns with invalid hard register operands.
7297 These invalid insns can eventually confuse reload to error out
7298 with a spill failure. See also PRs 46829 and 46843. */
7299 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
7303 extract_insn (insn);
7304 preprocess_constraints (insn);
7306 int n_operands = recog_data.n_operands;
7307 int n_alternatives = recog_data.n_alternatives;
7308 for (i = 0; i < n_operands; i++)
7310 rtx op = recog_data.operand[i];
7311 machine_mode mode = GET_MODE (op);
7312 const operand_alternative *op_alt;
7317 /* For pre-AVX disallow unaligned loads/stores where the
7318 instructions don't support it. */
7320 && VECTOR_MODE_P (mode)
7321 && misaligned_operand (op, mode))
7323 unsigned int min_align = get_attr_ssememalign (insn);
7325 || MEM_ALIGN (op) < min_align)
7329 /* A unary operator may be accepted by the predicate, but it
7330 is irrelevant for matching constraints. */
7336 if (REG_P (SUBREG_REG (op))
7337 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
7338 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
7339 GET_MODE (SUBREG_REG (op)),
7342 op = SUBREG_REG (op);
7345 if (!(REG_P (op) && HARD_REGISTER_P (op)))
7348 op_alt = recog_op_alt;
7350 /* Operand has no constraints, anything is OK. */
7351 win = !n_alternatives;
7353 alternative_mask preferred = get_preferred_alternatives (insn);
7354 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
7356 if (!TEST_BIT (preferred, j))
7358 if (op_alt[i].anything_ok
7359 || (op_alt[i].matches != -1
7361 (recog_data.operand[i],
7362 recog_data.operand[op_alt[i].matches]))
7363 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
7378 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
7380 static unsigned HOST_WIDE_INT
7381 ix86_asan_shadow_offset (void)
7383 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
7384 : HOST_WIDE_INT_C (0x7fff8000))
7385 : (HOST_WIDE_INT_1 << 29);
7388 /* Argument support functions. */
7390 /* Return true when register may be used to pass function parameters. */
7392 ix86_function_arg_regno_p (int regno)
7395 enum calling_abi call_abi;
7396 const int *parm_regs;
7398 if (TARGET_MPX && BND_REGNO_P (regno))
7404 return (regno < REGPARM_MAX
7405 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
7407 return (regno < REGPARM_MAX
7408 || (TARGET_MMX && MMX_REGNO_P (regno)
7409 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
7410 || (TARGET_SSE && SSE_REGNO_P (regno)
7411 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
7414 if (TARGET_SSE && SSE_REGNO_P (regno)
7415 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
7418 /* TODO: The function should depend on current function ABI but
7419 builtins.c would need updating then. Therefore we use the
7421 call_abi = ix86_cfun_abi ();
7423 /* RAX is used as hidden argument to va_arg functions. */
7424 if (call_abi == SYSV_ABI && regno == AX_REG)
7427 if (call_abi == MS_ABI)
7428 parm_regs = x86_64_ms_abi_int_parameter_registers;
7430 parm_regs = x86_64_int_parameter_registers;
7432 for (i = 0; i < (call_abi == MS_ABI
7433 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
7434 if (regno == parm_regs[i])
7439 /* Return if we do not know how to pass TYPE solely in registers. */
7442 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
7444 if (must_pass_in_stack_var_size_or_pad (mode, type))
7447 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
7448 The layout_type routine is crafty and tries to trick us into passing
7449 currently unsupported vector types on the stack by using TImode. */
7450 return (!TARGET_64BIT && mode == TImode
7451 && type && TREE_CODE (type) != VECTOR_TYPE);
7454 /* It returns the size, in bytes, of the area reserved for arguments passed
7455 in registers for the function represented by fndecl dependent to the used
7458 ix86_reg_parm_stack_space (const_tree fndecl)
7460 enum calling_abi call_abi = SYSV_ABI;
7461 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
7462 call_abi = ix86_function_abi (fndecl);
7464 call_abi = ix86_function_type_abi (fndecl);
7465 if (TARGET_64BIT && call_abi == MS_ABI)
7470 /* We add this as a workaround in order to use libc_has_function
7473 ix86_libc_has_function (enum function_class fn_class)
7475 return targetm.libc_has_function (fn_class);
7478 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
7479 specifying the call abi used. */
7481 ix86_function_type_abi (const_tree fntype)
7483 enum calling_abi abi = ix86_abi;
7485 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
7489 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
7492 error ("X32 does not support ms_abi attribute");
7496 else if (abi == MS_ABI
7497 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
7503 static enum calling_abi
7504 ix86_function_abi (const_tree fndecl)
7506 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
7509 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
7510 specifying the call abi used. */
7512 ix86_cfun_abi (void)
7514 return cfun ? cfun->machine->call_abi : ix86_abi;
7518 ix86_function_ms_hook_prologue (const_tree fn)
7520 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
7522 if (decl_function_context (fn) != NULL_TREE)
7523 error_at (DECL_SOURCE_LOCATION (fn),
7524 "ms_hook_prologue is not compatible with nested function");
7531 /* Write the extra assembler code needed to declare a function properly. */
7534 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
7537 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
7541 int i, filler_count = (TARGET_64BIT ? 32 : 16);
7542 unsigned int filler_cc = 0xcccccccc;
7544 for (i = 0; i < filler_count; i += 4)
7545 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
7548 #ifdef SUBTARGET_ASM_UNWIND_INIT
7549 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
7552 ASM_OUTPUT_LABEL (asm_out_file, fname);
7554 /* Output magic byte marker, if hot-patch attribute is set. */
7559 /* leaq [%rsp + 0], %rsp */
7560 asm_fprintf (asm_out_file, ASM_BYTE
7561 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
7565 /* movl.s %edi, %edi
7567 movl.s %esp, %ebp */
7568 asm_fprintf (asm_out_file, ASM_BYTE
7569 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
7575 extern void init_regs (void);
7577 /* Implementation of call abi switching target hook. Specific to FNDECL
7578 the specific call register sets are set. See also
7579 ix86_conditional_register_usage for more details. */
7581 ix86_call_abi_override (const_tree fndecl)
7583 cfun->machine->call_abi = ix86_function_abi (fndecl);
7586 /* Return 1 if pseudo register should be created and used to hold
7587 GOT address for PIC code. */
7589 ix86_use_pseudo_pic_reg (void)
7592 && (ix86_cmodel == CM_SMALL_PIC
7599 /* Initialize large model PIC register. */
7602 ix86_init_large_pic_reg (unsigned int tmp_regno)
7604 rtx_code_label *label;
7607 gcc_assert (Pmode == DImode);
7608 label = gen_label_rtx ();
7610 LABEL_PRESERVE_P (label) = 1;
7611 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
7612 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
7613 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
7615 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
7616 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
7617 pic_offset_table_rtx, tmp_reg));
7620 /* Create and initialize PIC register if required. */
7622 ix86_init_pic_reg (void)
7627 if (!ix86_use_pseudo_pic_reg ())
7634 if (ix86_cmodel == CM_LARGE_PIC)
7635 ix86_init_large_pic_reg (R11_REG);
7637 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
7641 /* If there is future mcount call in the function it is more profitable
7642 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
7643 rtx reg = crtl->profile
7644 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
7645 : pic_offset_table_rtx;
7646 rtx_insn *insn = emit_insn (gen_set_got (reg));
7647 RTX_FRAME_RELATED_P (insn) = 1;
7649 emit_move_insn (pic_offset_table_rtx, reg);
7650 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
7656 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7657 insert_insn_on_edge (seq, entry_edge);
7658 commit_one_edge_insertion (entry_edge);
7661 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7662 for a call to a function whose data type is FNTYPE.
7663 For a library call, FNTYPE is 0. */
7666 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
7667 tree fntype, /* tree ptr for function decl */
7668 rtx libname, /* SYMBOL_REF of library name or 0 */
7672 struct cgraph_local_info *i = NULL;
7673 struct cgraph_node *target = NULL;
7675 memset (cum, 0, sizeof (*cum));
7679 target = cgraph_node::get (fndecl);
7682 target = target->function_symbol ();
7683 i = cgraph_node::local_info (target->decl);
7684 cum->call_abi = ix86_function_abi (target->decl);
7687 cum->call_abi = ix86_function_abi (fndecl);
7690 cum->call_abi = ix86_function_type_abi (fntype);
7692 cum->caller = caller;
7694 /* Set up the number of registers to use for passing arguments. */
7695 cum->nregs = ix86_regparm;
7698 cum->nregs = (cum->call_abi == SYSV_ABI
7699 ? X86_64_REGPARM_MAX
7700 : X86_64_MS_REGPARM_MAX);
7704 cum->sse_nregs = SSE_REGPARM_MAX;
7707 cum->sse_nregs = (cum->call_abi == SYSV_ABI
7708 ? X86_64_SSE_REGPARM_MAX
7709 : X86_64_MS_SSE_REGPARM_MAX);
7713 cum->mmx_nregs = MMX_REGPARM_MAX;
7714 cum->warn_avx512f = true;
7715 cum->warn_avx = true;
7716 cum->warn_sse = true;
7717 cum->warn_mmx = true;
7719 /* Because type might mismatch in between caller and callee, we need to
7720 use actual type of function for local calls.
7721 FIXME: cgraph_analyze can be told to actually record if function uses
7722 va_start so for local functions maybe_vaarg can be made aggressive
7724 FIXME: once typesytem is fixed, we won't need this code anymore. */
7725 if (i && i->local && i->can_change_signature)
7726 fntype = TREE_TYPE (target->decl);
7727 cum->stdarg = stdarg_p (fntype);
7728 cum->maybe_vaarg = (fntype
7729 ? (!prototype_p (fntype) || stdarg_p (fntype))
7732 cum->bnd_regno = FIRST_BND_REG;
7733 cum->bnds_in_bt = 0;
7734 cum->force_bnd_pass = 0;
7739 /* If there are variable arguments, then we won't pass anything
7740 in registers in 32-bit mode. */
7741 if (stdarg_p (fntype))
7744 /* Since in 32-bit, variable arguments are always passed on
7745 stack, there is scratch register available for indirect
7747 cfun->machine->arg_reg_available = true;
7750 cum->warn_avx512f = false;
7751 cum->warn_avx = false;
7752 cum->warn_sse = false;
7753 cum->warn_mmx = false;
7757 /* Use ecx and edx registers if function has fastcall attribute,
7758 else look for regparm information. */
7761 unsigned int ccvt = ix86_get_callcvt (fntype);
7762 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
7765 cum->fastcall = 1; /* Same first register as in fastcall. */
7767 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
7773 cum->nregs = ix86_function_regparm (fntype, fndecl);
7776 /* Set up the number of SSE registers used for passing SFmode
7777 and DFmode arguments. Warn for mismatching ABI. */
7778 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
7781 cfun->machine->arg_reg_available = (cum->nregs > 0);
7784 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
7785 But in the case of vector types, it is some vector mode.
7787 When we have only some of our vector isa extensions enabled, then there
7788 are some modes for which vector_mode_supported_p is false. For these
7789 modes, the generic vector support in gcc will choose some non-vector mode
7790 in order to implement the type. By computing the natural mode, we'll
7791 select the proper ABI location for the operand and not depend on whatever
7792 the middle-end decides to do with these vector types.
7794 The midde-end can't deal with the vector types > 16 bytes. In this
7795 case, we return the original mode and warn ABI change if CUM isn't
7798 If INT_RETURN is true, warn ABI change if the vector mode isn't
7799 available for function return value. */
7802 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
7805 machine_mode mode = TYPE_MODE (type);
7807 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
7809 HOST_WIDE_INT size = int_size_in_bytes (type);
7810 if ((size == 8 || size == 16 || size == 32 || size == 64)
7811 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
7812 && TYPE_VECTOR_SUBPARTS (type) > 1)
7814 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
7816 /* There are no XFmode vector modes. */
7817 if (innermode == XFmode)
7820 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
7821 mode = MIN_MODE_VECTOR_FLOAT;
7823 mode = MIN_MODE_VECTOR_INT;
7825 /* Get the mode which has this inner mode and number of units. */
7826 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
7827 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
7828 && GET_MODE_INNER (mode) == innermode)
7830 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
7832 static bool warnedavx512f;
7833 static bool warnedavx512f_ret;
7835 if (cum && cum->warn_avx512f && !warnedavx512f)
7837 if (warning (OPT_Wpsabi, "AVX512F vector argument "
7838 "without AVX512F enabled changes the ABI"))
7839 warnedavx512f = true;
7841 else if (in_return && !warnedavx512f_ret)
7843 if (warning (OPT_Wpsabi, "AVX512F vector return "
7844 "without AVX512F enabled changes the ABI"))
7845 warnedavx512f_ret = true;
7848 return TYPE_MODE (type);
7850 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
7852 static bool warnedavx;
7853 static bool warnedavx_ret;
7855 if (cum && cum->warn_avx && !warnedavx)
7857 if (warning (OPT_Wpsabi, "AVX vector argument "
7858 "without AVX enabled changes the ABI"))
7861 else if (in_return && !warnedavx_ret)
7863 if (warning (OPT_Wpsabi, "AVX vector return "
7864 "without AVX enabled changes the ABI"))
7865 warnedavx_ret = true;
7868 return TYPE_MODE (type);
7870 else if (((size == 8 && TARGET_64BIT) || size == 16)
7874 static bool warnedsse;
7875 static bool warnedsse_ret;
7877 if (cum && cum->warn_sse && !warnedsse)
7879 if (warning (OPT_Wpsabi, "SSE vector argument "
7880 "without SSE enabled changes the ABI"))
7883 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
7885 if (warning (OPT_Wpsabi, "SSE vector return "
7886 "without SSE enabled changes the ABI"))
7887 warnedsse_ret = true;
7890 else if ((size == 8 && !TARGET_64BIT)
7894 static bool warnedmmx;
7895 static bool warnedmmx_ret;
7897 if (cum && cum->warn_mmx && !warnedmmx)
7899 if (warning (OPT_Wpsabi, "MMX vector argument "
7900 "without MMX enabled changes the ABI"))
7903 else if (in_return && !warnedmmx_ret)
7905 if (warning (OPT_Wpsabi, "MMX vector return "
7906 "without MMX enabled changes the ABI"))
7907 warnedmmx_ret = true;
7920 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
7921 this may not agree with the mode that the type system has chosen for the
7922 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
7923 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
7926 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
7931 if (orig_mode != BLKmode)
7932 tmp = gen_rtx_REG (orig_mode, regno);
7935 tmp = gen_rtx_REG (mode, regno);
7936 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
7937 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
7943 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
7944 of this code is to classify each 8bytes of incoming argument by the register
7945 class and assign registers accordingly. */
7947 /* Return the union class of CLASS1 and CLASS2.
7948 See the x86-64 PS ABI for details. */
7950 static enum x86_64_reg_class
7951 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
7953 /* Rule #1: If both classes are equal, this is the resulting class. */
7954 if (class1 == class2)
7957 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
7959 if (class1 == X86_64_NO_CLASS)
7961 if (class2 == X86_64_NO_CLASS)
7964 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
7965 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
7966 return X86_64_MEMORY_CLASS;
7968 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
7969 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
7970 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
7971 return X86_64_INTEGERSI_CLASS;
7972 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
7973 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
7974 return X86_64_INTEGER_CLASS;
7976 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
7978 if (class1 == X86_64_X87_CLASS
7979 || class1 == X86_64_X87UP_CLASS
7980 || class1 == X86_64_COMPLEX_X87_CLASS
7981 || class2 == X86_64_X87_CLASS
7982 || class2 == X86_64_X87UP_CLASS
7983 || class2 == X86_64_COMPLEX_X87_CLASS)
7984 return X86_64_MEMORY_CLASS;
7986 /* Rule #6: Otherwise class SSE is used. */
7987 return X86_64_SSE_CLASS;
7990 /* Classify the argument of type TYPE and mode MODE.
7991 CLASSES will be filled by the register class used to pass each word
7992 of the operand. The number of words is returned. In case the parameter
7993 should be passed in memory, 0 is returned. As a special case for zero
7994 sized containers, classes[0] will be NO_CLASS and 1 is returned.
7996 BIT_OFFSET is used internally for handling records and specifies offset
7997 of the offset in bits modulo 512 to avoid overflow cases.
7999 See the x86-64 PS ABI for details.
8003 classify_argument (machine_mode mode, const_tree type,
8004 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
8006 HOST_WIDE_INT bytes =
8007 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
8008 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
8010 /* Variable sized entities are always passed/returned in memory. */
8014 if (mode != VOIDmode
8015 && targetm.calls.must_pass_in_stack (mode, type))
8018 if (type && AGGREGATE_TYPE_P (type))
8022 enum x86_64_reg_class subclasses[MAX_CLASSES];
8024 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
8028 for (i = 0; i < words; i++)
8029 classes[i] = X86_64_NO_CLASS;
8031 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
8032 signalize memory class, so handle it as special case. */
8035 classes[0] = X86_64_NO_CLASS;
8039 /* Classify each field of record and merge classes. */
8040 switch (TREE_CODE (type))
8043 /* And now merge the fields of structure. */
8044 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8046 if (TREE_CODE (field) == FIELD_DECL)
8050 if (TREE_TYPE (field) == error_mark_node)
8053 /* Bitfields are always classified as integer. Handle them
8054 early, since later code would consider them to be
8055 misaligned integers. */
8056 if (DECL_BIT_FIELD (field))
8058 for (i = (int_bit_position (field)
8059 + (bit_offset % 64)) / 8 / 8;
8060 i < ((int_bit_position (field) + (bit_offset % 64))
8061 + tree_to_shwi (DECL_SIZE (field))
8064 merge_classes (X86_64_INTEGER_CLASS,
8071 type = TREE_TYPE (field);
8073 /* Flexible array member is ignored. */
8074 if (TYPE_MODE (type) == BLKmode
8075 && TREE_CODE (type) == ARRAY_TYPE
8076 && TYPE_SIZE (type) == NULL_TREE
8077 && TYPE_DOMAIN (type) != NULL_TREE
8078 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
8083 if (!warned && warn_psabi)
8086 inform (input_location,
8087 "the ABI of passing struct with"
8088 " a flexible array member has"
8089 " changed in GCC 4.4");
8093 num = classify_argument (TYPE_MODE (type), type,
8095 (int_bit_position (field)
8096 + bit_offset) % 512);
8099 pos = (int_bit_position (field)
8100 + (bit_offset % 64)) / 8 / 8;
8101 for (i = 0; i < num && (i + pos) < words; i++)
8103 merge_classes (subclasses[i], classes[i + pos]);
8110 /* Arrays are handled as small records. */
8113 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
8114 TREE_TYPE (type), subclasses, bit_offset);
8118 /* The partial classes are now full classes. */
8119 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
8120 subclasses[0] = X86_64_SSE_CLASS;
8121 if (subclasses[0] == X86_64_INTEGERSI_CLASS
8122 && !((bit_offset % 64) == 0 && bytes == 4))
8123 subclasses[0] = X86_64_INTEGER_CLASS;
8125 for (i = 0; i < words; i++)
8126 classes[i] = subclasses[i % num];
8131 case QUAL_UNION_TYPE:
8132 /* Unions are similar to RECORD_TYPE but offset is always 0.
8134 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8136 if (TREE_CODE (field) == FIELD_DECL)
8140 if (TREE_TYPE (field) == error_mark_node)
8143 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
8144 TREE_TYPE (field), subclasses,
8148 for (i = 0; i < num && i < words; i++)
8149 classes[i] = merge_classes (subclasses[i], classes[i]);
8160 /* When size > 16 bytes, if the first one isn't
8161 X86_64_SSE_CLASS or any other ones aren't
8162 X86_64_SSEUP_CLASS, everything should be passed in
8164 if (classes[0] != X86_64_SSE_CLASS)
8167 for (i = 1; i < words; i++)
8168 if (classes[i] != X86_64_SSEUP_CLASS)
8172 /* Final merger cleanup. */
8173 for (i = 0; i < words; i++)
8175 /* If one class is MEMORY, everything should be passed in
8177 if (classes[i] == X86_64_MEMORY_CLASS)
8180 /* The X86_64_SSEUP_CLASS should be always preceded by
8181 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
8182 if (classes[i] == X86_64_SSEUP_CLASS
8183 && classes[i - 1] != X86_64_SSE_CLASS
8184 && classes[i - 1] != X86_64_SSEUP_CLASS)
8186 /* The first one should never be X86_64_SSEUP_CLASS. */
8187 gcc_assert (i != 0);
8188 classes[i] = X86_64_SSE_CLASS;
8191 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
8192 everything should be passed in memory. */
8193 if (classes[i] == X86_64_X87UP_CLASS
8194 && (classes[i - 1] != X86_64_X87_CLASS))
8198 /* The first one should never be X86_64_X87UP_CLASS. */
8199 gcc_assert (i != 0);
8200 if (!warned && warn_psabi)
8203 inform (input_location,
8204 "the ABI of passing union with long double"
8205 " has changed in GCC 4.4");
8213 /* Compute alignment needed. We align all types to natural boundaries with
8214 exception of XFmode that is aligned to 64bits. */
8215 if (mode != VOIDmode && mode != BLKmode)
8217 int mode_alignment = GET_MODE_BITSIZE (mode);
8220 mode_alignment = 128;
8221 else if (mode == XCmode)
8222 mode_alignment = 256;
8223 if (COMPLEX_MODE_P (mode))
8224 mode_alignment /= 2;
8225 /* Misaligned fields are always returned in memory. */
8226 if (bit_offset % mode_alignment)
8230 /* for V1xx modes, just use the base mode */
8231 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
8232 && GET_MODE_UNIT_SIZE (mode) == bytes)
8233 mode = GET_MODE_INNER (mode);
8235 /* Classification of atomic types. */
8240 classes[0] = X86_64_SSE_CLASS;
8243 classes[0] = X86_64_SSE_CLASS;
8244 classes[1] = X86_64_SSEUP_CLASS;
8254 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
8256 /* Analyze last 128 bits only. */
8257 size = (size - 1) & 0x7f;
8261 classes[0] = X86_64_INTEGERSI_CLASS;
8266 classes[0] = X86_64_INTEGER_CLASS;
8269 else if (size < 64+32)
8271 classes[0] = X86_64_INTEGER_CLASS;
8272 classes[1] = X86_64_INTEGERSI_CLASS;
8275 else if (size < 64+64)
8277 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
8285 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
8289 /* OImode shouldn't be used directly. */
8294 if (!(bit_offset % 64))
8295 classes[0] = X86_64_SSESF_CLASS;
8297 classes[0] = X86_64_SSE_CLASS;
8300 classes[0] = X86_64_SSEDF_CLASS;
8303 classes[0] = X86_64_X87_CLASS;
8304 classes[1] = X86_64_X87UP_CLASS;
8307 classes[0] = X86_64_SSE_CLASS;
8308 classes[1] = X86_64_SSEUP_CLASS;
8311 classes[0] = X86_64_SSE_CLASS;
8312 if (!(bit_offset % 64))
8318 if (!warned && warn_psabi)
8321 inform (input_location,
8322 "the ABI of passing structure with complex float"
8323 " member has changed in GCC 4.4");
8325 classes[1] = X86_64_SSESF_CLASS;
8329 classes[0] = X86_64_SSEDF_CLASS;
8330 classes[1] = X86_64_SSEDF_CLASS;
8333 classes[0] = X86_64_COMPLEX_X87_CLASS;
8336 /* This modes is larger than 16 bytes. */
8344 classes[0] = X86_64_SSE_CLASS;
8345 classes[1] = X86_64_SSEUP_CLASS;
8346 classes[2] = X86_64_SSEUP_CLASS;
8347 classes[3] = X86_64_SSEUP_CLASS;
8355 classes[0] = X86_64_SSE_CLASS;
8356 classes[1] = X86_64_SSEUP_CLASS;
8357 classes[2] = X86_64_SSEUP_CLASS;
8358 classes[3] = X86_64_SSEUP_CLASS;
8359 classes[4] = X86_64_SSEUP_CLASS;
8360 classes[5] = X86_64_SSEUP_CLASS;
8361 classes[6] = X86_64_SSEUP_CLASS;
8362 classes[7] = X86_64_SSEUP_CLASS;
8370 classes[0] = X86_64_SSE_CLASS;
8371 classes[1] = X86_64_SSEUP_CLASS;
8379 classes[0] = X86_64_SSE_CLASS;
8385 gcc_assert (VECTOR_MODE_P (mode));
8390 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
8392 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
8393 classes[0] = X86_64_INTEGERSI_CLASS;
8395 classes[0] = X86_64_INTEGER_CLASS;
8396 classes[1] = X86_64_INTEGER_CLASS;
8397 return 1 + (bytes > 8);
8401 /* Examine the argument and return set number of register required in each
8402 class. Return true iff parameter should be passed in memory. */
8405 examine_argument (machine_mode mode, const_tree type, int in_return,
8406 int *int_nregs, int *sse_nregs)
8408 enum x86_64_reg_class regclass[MAX_CLASSES];
8409 int n = classify_argument (mode, type, regclass, 0);
8416 for (n--; n >= 0; n--)
8417 switch (regclass[n])
8419 case X86_64_INTEGER_CLASS:
8420 case X86_64_INTEGERSI_CLASS:
8423 case X86_64_SSE_CLASS:
8424 case X86_64_SSESF_CLASS:
8425 case X86_64_SSEDF_CLASS:
8428 case X86_64_NO_CLASS:
8429 case X86_64_SSEUP_CLASS:
8431 case X86_64_X87_CLASS:
8432 case X86_64_X87UP_CLASS:
8433 case X86_64_COMPLEX_X87_CLASS:
8437 case X86_64_MEMORY_CLASS:
8444 /* Construct container for the argument used by GCC interface. See
8445 FUNCTION_ARG for the detailed description. */
8448 construct_container (machine_mode mode, machine_mode orig_mode,
8449 const_tree type, int in_return, int nintregs, int nsseregs,
8450 const int *intreg, int sse_regno)
8452 /* The following variables hold the static issued_error state. */
8453 static bool issued_sse_arg_error;
8454 static bool issued_sse_ret_error;
8455 static bool issued_x87_ret_error;
8457 machine_mode tmpmode;
8459 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
8460 enum x86_64_reg_class regclass[MAX_CLASSES];
8464 int needed_sseregs, needed_intregs;
8465 rtx exp[MAX_CLASSES];
8468 n = classify_argument (mode, type, regclass, 0);
8471 if (examine_argument (mode, type, in_return, &needed_intregs,
8474 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
8477 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
8478 some less clueful developer tries to use floating-point anyway. */
8479 if (needed_sseregs && !TARGET_SSE)
8483 if (!issued_sse_ret_error)
8485 error ("SSE register return with SSE disabled");
8486 issued_sse_ret_error = true;
8489 else if (!issued_sse_arg_error)
8491 error ("SSE register argument with SSE disabled");
8492 issued_sse_arg_error = true;
8497 /* Likewise, error if the ABI requires us to return values in the
8498 x87 registers and the user specified -mno-80387. */
8499 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
8500 for (i = 0; i < n; i++)
8501 if (regclass[i] == X86_64_X87_CLASS
8502 || regclass[i] == X86_64_X87UP_CLASS
8503 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
8505 if (!issued_x87_ret_error)
8507 error ("x87 register return with x87 disabled");
8508 issued_x87_ret_error = true;
8513 /* First construct simple cases. Avoid SCmode, since we want to use
8514 single register to pass this type. */
8515 if (n == 1 && mode != SCmode)
8516 switch (regclass[0])
8518 case X86_64_INTEGER_CLASS:
8519 case X86_64_INTEGERSI_CLASS:
8520 return gen_rtx_REG (mode, intreg[0]);
8521 case X86_64_SSE_CLASS:
8522 case X86_64_SSESF_CLASS:
8523 case X86_64_SSEDF_CLASS:
8524 if (mode != BLKmode)
8525 return gen_reg_or_parallel (mode, orig_mode,
8526 SSE_REGNO (sse_regno));
8528 case X86_64_X87_CLASS:
8529 case X86_64_COMPLEX_X87_CLASS:
8530 return gen_rtx_REG (mode, FIRST_STACK_REG);
8531 case X86_64_NO_CLASS:
8532 /* Zero sized array, struct or class. */
8538 && regclass[0] == X86_64_SSE_CLASS
8539 && regclass[1] == X86_64_SSEUP_CLASS
8541 return gen_reg_or_parallel (mode, orig_mode,
8542 SSE_REGNO (sse_regno));
8544 && regclass[0] == X86_64_SSE_CLASS
8545 && regclass[1] == X86_64_SSEUP_CLASS
8546 && regclass[2] == X86_64_SSEUP_CLASS
8547 && regclass[3] == X86_64_SSEUP_CLASS
8549 return gen_reg_or_parallel (mode, orig_mode,
8550 SSE_REGNO (sse_regno));
8552 && regclass[0] == X86_64_SSE_CLASS
8553 && regclass[1] == X86_64_SSEUP_CLASS
8554 && regclass[2] == X86_64_SSEUP_CLASS
8555 && regclass[3] == X86_64_SSEUP_CLASS
8556 && regclass[4] == X86_64_SSEUP_CLASS
8557 && regclass[5] == X86_64_SSEUP_CLASS
8558 && regclass[6] == X86_64_SSEUP_CLASS
8559 && regclass[7] == X86_64_SSEUP_CLASS
8561 return gen_reg_or_parallel (mode, orig_mode,
8562 SSE_REGNO (sse_regno));
8564 && regclass[0] == X86_64_X87_CLASS
8565 && regclass[1] == X86_64_X87UP_CLASS)
8566 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
8569 && regclass[0] == X86_64_INTEGER_CLASS
8570 && regclass[1] == X86_64_INTEGER_CLASS
8571 && (mode == CDImode || mode == TImode)
8572 && intreg[0] + 1 == intreg[1])
8573 return gen_rtx_REG (mode, intreg[0]);
8575 /* Otherwise figure out the entries of the PARALLEL. */
8576 for (i = 0; i < n; i++)
8580 switch (regclass[i])
8582 case X86_64_NO_CLASS:
8584 case X86_64_INTEGER_CLASS:
8585 case X86_64_INTEGERSI_CLASS:
8586 /* Merge TImodes on aligned occasions here too. */
8587 if (i * 8 + 8 > bytes)
8589 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
8590 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
8594 /* We've requested 24 bytes we
8595 don't have mode for. Use DImode. */
8596 if (tmpmode == BLKmode)
8599 = gen_rtx_EXPR_LIST (VOIDmode,
8600 gen_rtx_REG (tmpmode, *intreg),
8604 case X86_64_SSESF_CLASS:
8606 = gen_rtx_EXPR_LIST (VOIDmode,
8607 gen_rtx_REG (SFmode,
8608 SSE_REGNO (sse_regno)),
8612 case X86_64_SSEDF_CLASS:
8614 = gen_rtx_EXPR_LIST (VOIDmode,
8615 gen_rtx_REG (DFmode,
8616 SSE_REGNO (sse_regno)),
8620 case X86_64_SSE_CLASS:
8628 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
8638 && regclass[1] == X86_64_SSEUP_CLASS
8639 && regclass[2] == X86_64_SSEUP_CLASS
8640 && regclass[3] == X86_64_SSEUP_CLASS);
8646 && regclass[1] == X86_64_SSEUP_CLASS
8647 && regclass[2] == X86_64_SSEUP_CLASS
8648 && regclass[3] == X86_64_SSEUP_CLASS
8649 && regclass[4] == X86_64_SSEUP_CLASS
8650 && regclass[5] == X86_64_SSEUP_CLASS
8651 && regclass[6] == X86_64_SSEUP_CLASS
8652 && regclass[7] == X86_64_SSEUP_CLASS);
8660 = gen_rtx_EXPR_LIST (VOIDmode,
8661 gen_rtx_REG (tmpmode,
8662 SSE_REGNO (sse_regno)),
8671 /* Empty aligned struct, union or class. */
8675 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
8676 for (i = 0; i < nexps; i++)
8677 XVECEXP (ret, 0, i) = exp [i];
8681 /* Update the data in CUM to advance over an argument of mode MODE
8682 and data type TYPE. (TYPE is null for libcalls where that information
8683 may not be available.)
8685 Return a number of integer regsiters advanced over. */
8688 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8689 const_tree type, HOST_WIDE_INT bytes,
8690 HOST_WIDE_INT words)
8693 bool error_p = NULL;
8697 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8698 bytes in registers. */
8699 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8719 cum->words += words;
8720 cum->nregs -= words;
8721 cum->regno += words;
8722 if (cum->nregs >= 0)
8724 if (cum->nregs <= 0)
8727 cfun->machine->arg_reg_available = false;
8733 /* OImode shouldn't be used directly. */
8737 if (cum->float_in_sse == -1)
8739 if (cum->float_in_sse < 2)
8742 if (cum->float_in_sse == -1)
8744 if (cum->float_in_sse < 1)
8767 if (!type || !AGGREGATE_TYPE_P (type))
8769 cum->sse_words += words;
8770 cum->sse_nregs -= 1;
8771 cum->sse_regno += 1;
8772 if (cum->sse_nregs <= 0)
8786 if (!type || !AGGREGATE_TYPE_P (type))
8788 cum->mmx_words += words;
8789 cum->mmx_nregs -= 1;
8790 cum->mmx_regno += 1;
8791 if (cum->mmx_nregs <= 0)
8801 cum->float_in_sse = 0;
8802 error ("calling %qD with SSE calling convention without "
8803 "SSE/SSE2 enabled", cum->decl);
8804 sorry ("this is a GCC bug that can be worked around by adding "
8805 "attribute used to function called");
8812 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
8813 const_tree type, HOST_WIDE_INT words, bool named)
8815 int int_nregs, sse_nregs;
8817 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
8818 if (!named && (VALID_AVX512F_REG_MODE (mode)
8819 || VALID_AVX256_REG_MODE (mode)))
8822 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
8823 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
8825 cum->nregs -= int_nregs;
8826 cum->sse_nregs -= sse_nregs;
8827 cum->regno += int_nregs;
8828 cum->sse_regno += sse_nregs;
8833 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
8834 cum->words = ROUND_UP (cum->words, align);
8835 cum->words += words;
8841 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
8842 HOST_WIDE_INT words)
8844 /* Otherwise, this should be passed indirect. */
8845 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
8847 cum->words += words;
8857 /* Update the data in CUM to advance over an argument of mode MODE and
8858 data type TYPE. (TYPE is null for libcalls where that information
8859 may not be available.) */
8862 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
8863 const_tree type, bool named)
8865 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8866 HOST_WIDE_INT bytes, words;
8869 if (mode == BLKmode)
8870 bytes = int_size_in_bytes (type);
8872 bytes = GET_MODE_SIZE (mode);
8873 words = CEIL (bytes, UNITS_PER_WORD);
8876 mode = type_natural_mode (type, NULL, false);
8878 if ((type && POINTER_BOUNDS_TYPE_P (type))
8879 || POINTER_BOUNDS_MODE_P (mode))
8881 /* If we pass bounds in BT then just update remained bounds count. */
8882 if (cum->bnds_in_bt)
8888 /* Update remained number of bounds to force. */
8889 if (cum->force_bnd_pass)
8890 cum->force_bnd_pass--;
8897 /* The first arg not going to Bounds Tables resets this counter. */
8898 cum->bnds_in_bt = 0;
8899 /* For unnamed args we always pass bounds to avoid bounds mess when
8900 passed and received types do not match. If bounds do not follow
8901 unnamed arg, still pretend required number of bounds were passed. */
8902 if (cum->force_bnd_pass)
8904 cum->bnd_regno += cum->force_bnd_pass;
8905 cum->force_bnd_pass = 0;
8910 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8912 if (call_abi == MS_ABI)
8913 nregs = function_arg_advance_ms_64 (cum, bytes, words);
8915 nregs = function_arg_advance_64 (cum, mode, type, words, named);
8918 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
8920 /* For stdarg we expect bounds to be passed for each value passed
8923 cum->force_bnd_pass = nregs;
8924 /* For pointers passed in memory we expect bounds passed in Bounds
8927 cum->bnds_in_bt = chkp_type_bounds_count (type);
8930 /* Define where to put the arguments to a function.
8931 Value is zero to push the argument on the stack,
8932 or a hard register in which to store the argument.
8934 MODE is the argument's machine mode.
8935 TYPE is the data type of the argument (as a tree).
8936 This is null for libcalls where that information may
8938 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8939 the preceding args and about the function being called.
8940 NAMED is nonzero if this argument is a named parameter
8941 (otherwise it is an extra parameter matching an ellipsis). */
8944 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8945 machine_mode orig_mode, const_tree type,
8946 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
8948 bool error_p = false;
8949 /* Avoid the AL settings for the Unix64 ABI. */
8950 if (mode == VOIDmode)
8955 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8956 bytes in registers. */
8957 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8976 if (words <= cum->nregs)
8978 int regno = cum->regno;
8980 /* Fastcall allocates the first two DWORD (SImode) or
8981 smaller arguments to ECX and EDX if it isn't an
8987 || (type && AGGREGATE_TYPE_P (type)))
8990 /* ECX not EAX is the first allocated register. */
8991 if (regno == AX_REG)
8994 return gen_rtx_REG (mode, regno);
8999 if (cum->float_in_sse == -1)
9001 if (cum->float_in_sse < 2)
9004 if (cum->float_in_sse == -1)
9006 if (cum->float_in_sse < 1)
9010 /* In 32bit, we pass TImode in xmm registers. */
9017 if (!type || !AGGREGATE_TYPE_P (type))
9020 return gen_reg_or_parallel (mode, orig_mode,
9021 cum->sse_regno + FIRST_SSE_REG);
9027 /* OImode and XImode shouldn't be used directly. */
9042 if (!type || !AGGREGATE_TYPE_P (type))
9045 return gen_reg_or_parallel (mode, orig_mode,
9046 cum->sse_regno + FIRST_SSE_REG);
9056 if (!type || !AGGREGATE_TYPE_P (type))
9059 return gen_reg_or_parallel (mode, orig_mode,
9060 cum->mmx_regno + FIRST_MMX_REG);
9066 cum->float_in_sse = 0;
9067 error ("calling %qD with SSE calling convention without "
9068 "SSE/SSE2 enabled", cum->decl);
9069 sorry ("this is a GCC bug that can be worked around by adding "
9070 "attribute used to function called");
9077 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
9078 machine_mode orig_mode, const_tree type, bool named)
9080 /* Handle a hidden AL argument containing number of registers
9081 for varargs x86-64 functions. */
9082 if (mode == VOIDmode)
9083 return GEN_INT (cum->maybe_vaarg
9084 ? (cum->sse_nregs < 0
9085 ? X86_64_SSE_REGPARM_MAX
9106 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9112 return construct_container (mode, orig_mode, type, 0, cum->nregs,
9114 &x86_64_int_parameter_registers [cum->regno],
9119 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
9120 machine_mode orig_mode, bool named,
9121 HOST_WIDE_INT bytes)
9125 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
9126 We use value of -2 to specify that current function call is MSABI. */
9127 if (mode == VOIDmode)
9128 return GEN_INT (-2);
9130 /* If we've run out of registers, it goes on the stack. */
9131 if (cum->nregs == 0)
9134 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
9136 /* Only floating point modes are passed in anything but integer regs. */
9137 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
9140 regno = cum->regno + FIRST_SSE_REG;
9145 /* Unnamed floating parameters are passed in both the
9146 SSE and integer registers. */
9147 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
9148 t2 = gen_rtx_REG (mode, regno);
9149 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
9150 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
9151 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
9154 /* Handle aggregated types passed in register. */
9155 if (orig_mode == BLKmode)
9157 if (bytes > 0 && bytes <= 8)
9158 mode = (bytes > 4 ? DImode : SImode);
9159 if (mode == BLKmode)
9163 return gen_reg_or_parallel (mode, orig_mode, regno);
9166 /* Return where to put the arguments to a function.
9167 Return zero to push the argument on the stack, or a hard register in which to store the argument.
9169 MODE is the argument's machine mode. TYPE is the data type of the
9170 argument. It is null for libcalls where that information may not be
9171 available. CUM gives information about the preceding args and about
9172 the function being called. NAMED is nonzero if this argument is a
9173 named parameter (otherwise it is an extra parameter matching an
9177 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
9178 const_tree type, bool named)
9180 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9181 machine_mode mode = omode;
9182 HOST_WIDE_INT bytes, words;
9185 /* All pointer bounds arguments are handled separately here. */
9186 if ((type && POINTER_BOUNDS_TYPE_P (type))
9187 || POINTER_BOUNDS_MODE_P (mode))
9189 /* Return NULL if bounds are forced to go in Bounds Table. */
9190 if (cum->bnds_in_bt)
9192 /* Return the next available bound reg if any. */
9193 else if (cum->bnd_regno <= LAST_BND_REG)
9194 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
9195 /* Return the next special slot number otherwise. */
9197 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
9202 if (mode == BLKmode)
9203 bytes = int_size_in_bytes (type);
9205 bytes = GET_MODE_SIZE (mode);
9206 words = CEIL (bytes, UNITS_PER_WORD);
9208 /* To simplify the code below, represent vector types with a vector mode
9209 even if MMX/SSE are not active. */
9210 if (type && TREE_CODE (type) == VECTOR_TYPE)
9211 mode = type_natural_mode (type, cum, false);
9215 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
9217 if (call_abi == MS_ABI)
9218 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
9220 arg = function_arg_64 (cum, mode, omode, type, named);
9223 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
9228 /* A C expression that indicates when an argument must be passed by
9229 reference. If nonzero for an argument, a copy of that argument is
9230 made in memory and a pointer to the argument is passed instead of
9231 the argument itself. The pointer is passed in whatever way is
9232 appropriate for passing a pointer to that type. */
9235 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
9236 const_tree type, bool)
9238 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9240 /* Bounds are never passed by reference. */
9241 if ((type && POINTER_BOUNDS_TYPE_P (type))
9242 || POINTER_BOUNDS_MODE_P (mode))
9247 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
9249 /* See Windows x64 Software Convention. */
9250 if (call_abi == MS_ABI)
9252 HOST_WIDE_INT msize = GET_MODE_SIZE (mode);
9256 /* Arrays are passed by reference. */
9257 if (TREE_CODE (type) == ARRAY_TYPE)
9260 if (RECORD_OR_UNION_TYPE_P (type))
9262 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
9263 are passed by reference. */
9264 msize = int_size_in_bytes (type);
9268 /* __m128 is passed by reference. */
9269 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
9271 else if (type && int_size_in_bytes (type) == -1)
9278 /* Return true when TYPE should be 128bit aligned for 32bit argument
9279 passing ABI. XXX: This function is obsolete and is only used for
9280 checking psABI compatibility with previous versions of GCC. */
9283 ix86_compat_aligned_value_p (const_tree type)
9285 machine_mode mode = TYPE_MODE (type);
9286 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
9290 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
9292 if (TYPE_ALIGN (type) < 128)
9295 if (AGGREGATE_TYPE_P (type))
9297 /* Walk the aggregates recursively. */
9298 switch (TREE_CODE (type))
9302 case QUAL_UNION_TYPE:
9306 /* Walk all the structure fields. */
9307 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
9309 if (TREE_CODE (field) == FIELD_DECL
9310 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
9317 /* Just for use if some languages passes arrays by value. */
9318 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
9329 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
9330 XXX: This function is obsolete and is only used for checking psABI
9331 compatibility with previous versions of GCC. */
9334 ix86_compat_function_arg_boundary (machine_mode mode,
9335 const_tree type, unsigned int align)
9337 /* In 32bit, only _Decimal128 and __float128 are aligned to their
9338 natural boundaries. */
9339 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
9341 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
9342 make an exception for SSE modes since these require 128bit
9345 The handling here differs from field_alignment. ICC aligns MMX
9346 arguments to 4 byte boundaries, while structure fields are aligned
9347 to 8 byte boundaries. */
9350 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
9351 align = PARM_BOUNDARY;
9355 if (!ix86_compat_aligned_value_p (type))
9356 align = PARM_BOUNDARY;
9359 if (align > BIGGEST_ALIGNMENT)
9360 align = BIGGEST_ALIGNMENT;
9364 /* Return true when TYPE should be 128bit aligned for 32bit argument
9368 ix86_contains_aligned_value_p (const_tree type)
9370 machine_mode mode = TYPE_MODE (type);
9372 if (mode == XFmode || mode == XCmode)
9375 if (TYPE_ALIGN (type) < 128)
9378 if (AGGREGATE_TYPE_P (type))
9380 /* Walk the aggregates recursively. */
9381 switch (TREE_CODE (type))
9385 case QUAL_UNION_TYPE:
9389 /* Walk all the structure fields. */
9390 for (field = TYPE_FIELDS (type);
9392 field = DECL_CHAIN (field))
9394 if (TREE_CODE (field) == FIELD_DECL
9395 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
9402 /* Just for use if some languages passes arrays by value. */
9403 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
9412 return TYPE_ALIGN (type) >= 128;
9417 /* Gives the alignment boundary, in bits, of an argument with the
9418 specified mode and type. */
9421 ix86_function_arg_boundary (machine_mode mode, const_tree type)
9426 /* Since the main variant type is used for call, we convert it to
9427 the main variant type. */
9428 type = TYPE_MAIN_VARIANT (type);
9429 align = TYPE_ALIGN (type);
9432 align = GET_MODE_ALIGNMENT (mode);
9433 if (align < PARM_BOUNDARY)
9434 align = PARM_BOUNDARY;
9438 unsigned int saved_align = align;
9442 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
9445 if (mode == XFmode || mode == XCmode)
9446 align = PARM_BOUNDARY;
9448 else if (!ix86_contains_aligned_value_p (type))
9449 align = PARM_BOUNDARY;
9452 align = PARM_BOUNDARY;
9457 && align != ix86_compat_function_arg_boundary (mode, type,
9461 inform (input_location,
9462 "The ABI for passing parameters with %d-byte"
9463 " alignment has changed in GCC 4.6",
9464 align / BITS_PER_UNIT);
9471 /* Return true if N is a possible register number of function value. */
9474 ix86_function_value_regno_p (const unsigned int regno)
9481 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
9484 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
9488 return chkp_function_instrumented_p (current_function_decl);
9490 /* Complex values are returned in %st(0)/%st(1) pair. */
9493 /* TODO: The function should depend on current function ABI but
9494 builtins.c would need updating then. Therefore we use the
9496 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
9498 return TARGET_FLOAT_RETURNS_IN_80387;
9500 /* Complex values are returned in %xmm0/%xmm1 pair. */
9506 if (TARGET_MACHO || TARGET_64BIT)
9514 /* Define how to find the value returned by a function.
9515 VALTYPE is the data type of the value (as a tree).
9516 If the precise function being called is known, FUNC is its FUNCTION_DECL;
9517 otherwise, FUNC is 0. */
9520 function_value_32 (machine_mode orig_mode, machine_mode mode,
9521 const_tree fntype, const_tree fn)
9525 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
9526 we normally prevent this case when mmx is not available. However
9527 some ABIs may require the result to be returned like DImode. */
9528 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
9529 regno = FIRST_MMX_REG;
9531 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
9532 we prevent this case when sse is not available. However some ABIs
9533 may require the result to be returned like integer TImode. */
9534 else if (mode == TImode
9535 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
9536 regno = FIRST_SSE_REG;
9538 /* 32-byte vector modes in %ymm0. */
9539 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
9540 regno = FIRST_SSE_REG;
9542 /* 64-byte vector modes in %zmm0. */
9543 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
9544 regno = FIRST_SSE_REG;
9546 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
9547 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
9548 regno = FIRST_FLOAT_REG;
9550 /* Most things go in %eax. */
9553 /* Override FP return register with %xmm0 for local functions when
9554 SSE math is enabled or for functions with sseregparm attribute. */
9555 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
9557 int sse_level = ix86_function_sseregparm (fntype, fn, false);
9558 if (sse_level == -1)
9560 error ("calling %qD with SSE caling convention without "
9561 "SSE/SSE2 enabled", fn);
9562 sorry ("this is a GCC bug that can be worked around by adding "
9563 "attribute used to function called");
9565 else if ((sse_level >= 1 && mode == SFmode)
9566 || (sse_level == 2 && mode == DFmode))
9567 regno = FIRST_SSE_REG;
9570 /* OImode shouldn't be used directly. */
9571 gcc_assert (mode != OImode);
9573 return gen_rtx_REG (orig_mode, regno);
9577 function_value_64 (machine_mode orig_mode, machine_mode mode,
9582 /* Handle libcalls, which don't provide a type node. */
9583 if (valtype == NULL)
9597 regno = FIRST_SSE_REG;
9601 regno = FIRST_FLOAT_REG;
9609 return gen_rtx_REG (mode, regno);
9611 else if (POINTER_TYPE_P (valtype))
9613 /* Pointers are always returned in word_mode. */
9617 ret = construct_container (mode, orig_mode, valtype, 1,
9618 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
9619 x86_64_int_return_registers, 0);
9621 /* For zero sized structures, construct_container returns NULL, but we
9622 need to keep rest of compiler happy by returning meaningful value. */
9624 ret = gen_rtx_REG (orig_mode, AX_REG);
9630 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
9633 unsigned int regno = AX_REG;
9637 switch (GET_MODE_SIZE (mode))
9640 if (valtype != NULL_TREE
9641 && !VECTOR_INTEGER_TYPE_P (valtype)
9642 && !VECTOR_INTEGER_TYPE_P (valtype)
9643 && !INTEGRAL_TYPE_P (valtype)
9644 && !VECTOR_FLOAT_TYPE_P (valtype))
9646 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9647 && !COMPLEX_MODE_P (mode))
9648 regno = FIRST_SSE_REG;
9652 if (mode == SFmode || mode == DFmode)
9653 regno = FIRST_SSE_REG;
9659 return gen_rtx_REG (orig_mode, regno);
9663 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
9664 machine_mode orig_mode, machine_mode mode)
9666 const_tree fn, fntype;
9669 if (fntype_or_decl && DECL_P (fntype_or_decl))
9670 fn = fntype_or_decl;
9671 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
9673 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
9674 || POINTER_BOUNDS_MODE_P (mode))
9675 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
9676 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
9677 return function_value_ms_64 (orig_mode, mode, valtype);
9678 else if (TARGET_64BIT)
9679 return function_value_64 (orig_mode, mode, valtype);
9681 return function_value_32 (orig_mode, mode, fntype, fn);
9685 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
9687 machine_mode mode, orig_mode;
9689 orig_mode = TYPE_MODE (valtype);
9690 mode = type_natural_mode (valtype, NULL, true);
9691 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
9694 /* Return an RTX representing a place where a function returns
9695 or recieves pointer bounds or NULL if no bounds are returned.
9697 VALTYPE is a data type of a value returned by the function.
9699 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
9700 or FUNCTION_TYPE of the function.
9702 If OUTGOING is false, return a place in which the caller will
9703 see the return value. Otherwise, return a place where a
9704 function returns a value. */
9707 ix86_function_value_bounds (const_tree valtype,
9708 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
9709 bool outgoing ATTRIBUTE_UNUSED)
9713 if (BOUNDED_TYPE_P (valtype))
9714 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
9715 else if (chkp_type_has_pointer (valtype))
9720 unsigned i, bnd_no = 0;
9722 bitmap_obstack_initialize (NULL);
9723 slots = BITMAP_ALLOC (NULL);
9724 chkp_find_bound_slots (valtype, slots);
9726 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
9728 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
9729 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
9730 gcc_assert (bnd_no < 2);
9731 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
9734 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
9736 BITMAP_FREE (slots);
9737 bitmap_obstack_release (NULL);
9745 /* Pointer function arguments and return values are promoted to
9749 ix86_promote_function_mode (const_tree type, machine_mode mode,
9750 int *punsignedp, const_tree fntype,
9753 if (type != NULL_TREE && POINTER_TYPE_P (type))
9755 *punsignedp = POINTERS_EXTEND_UNSIGNED;
9758 return default_promote_function_mode (type, mode, punsignedp, fntype,
9762 /* Return true if a structure, union or array with MODE containing FIELD
9763 should be accessed using BLKmode. */
9766 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
9768 /* Union with XFmode must be in BLKmode. */
9769 return (mode == XFmode
9770 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
9771 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
9775 ix86_libcall_value (machine_mode mode)
9777 return ix86_function_value_1 (NULL, NULL, mode, mode);
9780 /* Return true iff type is returned in memory. */
9783 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9785 #ifdef SUBTARGET_RETURN_IN_MEMORY
9786 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
9788 const machine_mode mode = type_natural_mode (type, NULL, true);
9791 if (POINTER_BOUNDS_TYPE_P (type))
9796 if (ix86_function_type_abi (fntype) == MS_ABI)
9798 size = int_size_in_bytes (type);
9800 /* __m128 is returned in xmm0. */
9801 if ((!type || VECTOR_INTEGER_TYPE_P (type)
9802 || INTEGRAL_TYPE_P (type)
9803 || VECTOR_FLOAT_TYPE_P (type))
9804 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9805 && !COMPLEX_MODE_P (mode)
9806 && (GET_MODE_SIZE (mode) == 16 || size == 16))
9809 /* Otherwise, the size must be exactly in [1248]. */
9810 return size != 1 && size != 2 && size != 4 && size != 8;
9814 int needed_intregs, needed_sseregs;
9816 return examine_argument (mode, type, 1,
9817 &needed_intregs, &needed_sseregs);
9822 size = int_size_in_bytes (type);
9824 /* Intel MCU psABI returns scalars and aggregates no larger than 8
9825 bytes in registers. */
9827 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
9829 if (mode == BLKmode)
9832 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
9835 if (VECTOR_MODE_P (mode) || mode == TImode)
9837 /* User-created vectors small enough to fit in EAX. */
9841 /* Unless ABI prescibes otherwise,
9842 MMX/3dNow values are returned in MM0 if available. */
9845 return TARGET_VECT8_RETURNS || !TARGET_MMX;
9847 /* SSE values are returned in XMM0 if available. */
9851 /* AVX values are returned in YMM0 if available. */
9855 /* AVX512F values are returned in ZMM0 if available. */
9857 return !TARGET_AVX512F;
9866 /* OImode shouldn't be used directly. */
9867 gcc_assert (mode != OImode);
9875 /* Create the va_list data type. */
9878 ix86_build_builtin_va_list_64 (void)
9880 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
9882 record = lang_hooks.types.make_type (RECORD_TYPE);
9883 type_decl = build_decl (BUILTINS_LOCATION,
9884 TYPE_DECL, get_identifier ("__va_list_tag"), record);
9886 f_gpr = build_decl (BUILTINS_LOCATION,
9887 FIELD_DECL, get_identifier ("gp_offset"),
9888 unsigned_type_node);
9889 f_fpr = build_decl (BUILTINS_LOCATION,
9890 FIELD_DECL, get_identifier ("fp_offset"),
9891 unsigned_type_node);
9892 f_ovf = build_decl (BUILTINS_LOCATION,
9893 FIELD_DECL, get_identifier ("overflow_arg_area"),
9895 f_sav = build_decl (BUILTINS_LOCATION,
9896 FIELD_DECL, get_identifier ("reg_save_area"),
9899 va_list_gpr_counter_field = f_gpr;
9900 va_list_fpr_counter_field = f_fpr;
9902 DECL_FIELD_CONTEXT (f_gpr) = record;
9903 DECL_FIELD_CONTEXT (f_fpr) = record;
9904 DECL_FIELD_CONTEXT (f_ovf) = record;
9905 DECL_FIELD_CONTEXT (f_sav) = record;
9907 TYPE_STUB_DECL (record) = type_decl;
9908 TYPE_NAME (record) = type_decl;
9909 TYPE_FIELDS (record) = f_gpr;
9910 DECL_CHAIN (f_gpr) = f_fpr;
9911 DECL_CHAIN (f_fpr) = f_ovf;
9912 DECL_CHAIN (f_ovf) = f_sav;
9914 layout_type (record);
9916 /* The correct type is an array type of one element. */
9917 return build_array_type (record, build_index_type (size_zero_node));
9920 /* Setup the builtin va_list data type and for 64-bit the additional
9921 calling convention specific va_list data types. */
9924 ix86_build_builtin_va_list (void)
9928 /* Initialize ABI specific va_list builtin types. */
9929 tree sysv_va_list, ms_va_list;
9931 sysv_va_list = ix86_build_builtin_va_list_64 ();
9932 sysv_va_list_type_node = build_variant_type_copy (sysv_va_list);
9934 /* For MS_ABI we use plain pointer to argument area. */
9935 ms_va_list = build_pointer_type (char_type_node);
9936 ms_va_list_type_node = build_variant_type_copy (ms_va_list);
9938 return (ix86_abi == MS_ABI) ? ms_va_list : sysv_va_list;
9942 /* For i386 we use plain pointer to argument area. */
9943 return build_pointer_type (char_type_node);
9947 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
9950 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
9956 /* GPR size of varargs save area. */
9957 if (cfun->va_list_gpr_size)
9958 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
9960 ix86_varargs_gpr_size = 0;
9962 /* FPR size of varargs save area. We don't need it if we don't pass
9963 anything in SSE registers. */
9964 if (TARGET_SSE && cfun->va_list_fpr_size)
9965 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
9967 ix86_varargs_fpr_size = 0;
9969 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
9972 save_area = frame_pointer_rtx;
9973 set = get_varargs_alias_set ();
9975 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9976 if (max > X86_64_REGPARM_MAX)
9977 max = X86_64_REGPARM_MAX;
9979 for (i = cum->regno; i < max; i++)
9981 mem = gen_rtx_MEM (word_mode,
9982 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
9983 MEM_NOTRAP_P (mem) = 1;
9984 set_mem_alias_set (mem, set);
9985 emit_move_insn (mem,
9986 gen_rtx_REG (word_mode,
9987 x86_64_int_parameter_registers[i]));
9990 if (ix86_varargs_fpr_size)
9993 rtx_code_label *label;
9996 /* Now emit code to save SSE registers. The AX parameter contains number
9997 of SSE parameter registers used to call this function, though all we
9998 actually check here is the zero/non-zero status. */
10000 label = gen_label_rtx ();
10001 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
10002 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
10005 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
10006 we used movdqa (i.e. TImode) instead? Perhaps even better would
10007 be if we could determine the real mode of the data, via a hook
10008 into pass_stdarg. Ignore all that for now. */
10010 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
10011 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
10013 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
10014 if (max > X86_64_SSE_REGPARM_MAX)
10015 max = X86_64_SSE_REGPARM_MAX;
10017 for (i = cum->sse_regno; i < max; ++i)
10019 mem = plus_constant (Pmode, save_area,
10020 i * 16 + ix86_varargs_gpr_size);
10021 mem = gen_rtx_MEM (smode, mem);
10022 MEM_NOTRAP_P (mem) = 1;
10023 set_mem_alias_set (mem, set);
10024 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
10026 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
10029 emit_label (label);
10034 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
10036 alias_set_type set = get_varargs_alias_set ();
10039 /* Reset to zero, as there might be a sysv vaarg used
10041 ix86_varargs_gpr_size = 0;
10042 ix86_varargs_fpr_size = 0;
10044 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
10048 mem = gen_rtx_MEM (Pmode,
10049 plus_constant (Pmode, virtual_incoming_args_rtx,
10050 i * UNITS_PER_WORD));
10051 MEM_NOTRAP_P (mem) = 1;
10052 set_mem_alias_set (mem, set);
10054 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
10055 emit_move_insn (mem, reg);
10060 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
10061 tree type, int *, int no_rtl)
10063 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10064 CUMULATIVE_ARGS next_cum;
10067 /* This argument doesn't appear to be used anymore. Which is good,
10068 because the old code here didn't suppress rtl generation. */
10069 gcc_assert (!no_rtl);
10074 fntype = TREE_TYPE (current_function_decl);
10076 /* For varargs, we do not want to skip the dummy va_dcl argument.
10077 For stdargs, we do want to skip the last named argument. */
10079 if (stdarg_p (fntype))
10080 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
10083 if (cum->call_abi == MS_ABI)
10084 setup_incoming_varargs_ms_64 (&next_cum);
10086 setup_incoming_varargs_64 (&next_cum);
10090 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
10091 enum machine_mode mode,
10093 int *pretend_size ATTRIBUTE_UNUSED,
10096 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10097 CUMULATIVE_ARGS next_cum;
10100 int bnd_reg, i, max;
10102 gcc_assert (!no_rtl);
10104 /* Do nothing if we use plain pointer to argument area. */
10105 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
10108 fntype = TREE_TYPE (current_function_decl);
10110 /* For varargs, we do not want to skip the dummy va_dcl argument.
10111 For stdargs, we do want to skip the last named argument. */
10113 if (stdarg_p (fntype))
10114 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
10116 save_area = frame_pointer_rtx;
10118 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
10119 if (max > X86_64_REGPARM_MAX)
10120 max = X86_64_REGPARM_MAX;
10122 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
10123 if (chkp_function_instrumented_p (current_function_decl))
10124 for (i = cum->regno; i < max; i++)
10126 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
10127 rtx ptr = gen_rtx_REG (Pmode,
10128 x86_64_int_parameter_registers[i]);
10131 if (bnd_reg <= LAST_BND_REG)
10132 bounds = gen_rtx_REG (BNDmode, bnd_reg);
10136 plus_constant (Pmode, arg_pointer_rtx,
10137 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
10138 bounds = gen_reg_rtx (BNDmode);
10139 emit_insn (BNDmode == BND64mode
10140 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
10141 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
10144 emit_insn (BNDmode == BND64mode
10145 ? gen_bnd64_stx (addr, ptr, bounds)
10146 : gen_bnd32_stx (addr, ptr, bounds));
10153 /* Checks if TYPE is of kind va_list char *. */
10156 is_va_list_char_pointer (tree type)
10160 /* For 32-bit it is always true. */
10163 canonic = ix86_canonical_va_list_type (type);
10164 return (canonic == ms_va_list_type_node
10165 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
10168 /* Implement va_start. */
10171 ix86_va_start (tree valist, rtx nextarg)
10173 HOST_WIDE_INT words, n_gpr, n_fpr;
10174 tree f_gpr, f_fpr, f_ovf, f_sav;
10175 tree gpr, fpr, ovf, sav, t;
10179 if (flag_split_stack
10180 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10182 unsigned int scratch_regno;
10184 /* When we are splitting the stack, we can't refer to the stack
10185 arguments using internal_arg_pointer, because they may be on
10186 the old stack. The split stack prologue will arrange to
10187 leave a pointer to the old stack arguments in a scratch
10188 register, which we here copy to a pseudo-register. The split
10189 stack prologue can't set the pseudo-register directly because
10190 it (the prologue) runs before any registers have been saved. */
10192 scratch_regno = split_stack_prologue_scratch_regno ();
10193 if (scratch_regno != INVALID_REGNUM)
10198 reg = gen_reg_rtx (Pmode);
10199 cfun->machine->split_stack_varargs_pointer = reg;
10202 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
10203 seq = get_insns ();
10206 push_topmost_sequence ();
10207 emit_insn_after (seq, entry_of_function ());
10208 pop_topmost_sequence ();
10212 /* Only 64bit target needs something special. */
10213 if (is_va_list_char_pointer (TREE_TYPE (valist)))
10215 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10216 std_expand_builtin_va_start (valist, nextarg);
10221 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
10222 next = expand_binop (ptr_mode, add_optab,
10223 cfun->machine->split_stack_varargs_pointer,
10224 crtl->args.arg_offset_rtx,
10225 NULL_RTX, 0, OPTAB_LIB_WIDEN);
10226 convert_move (va_r, next, 0);
10228 /* Store zero bounds for va_list. */
10229 if (chkp_function_instrumented_p (current_function_decl))
10230 chkp_expand_bounds_reset_for_mem (valist,
10231 make_tree (TREE_TYPE (valist),
10238 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
10239 f_fpr = DECL_CHAIN (f_gpr);
10240 f_ovf = DECL_CHAIN (f_fpr);
10241 f_sav = DECL_CHAIN (f_ovf);
10243 valist = build_simple_mem_ref (valist);
10244 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
10245 /* The following should be folded into the MEM_REF offset. */
10246 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
10248 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
10250 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
10252 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
10255 /* Count number of gp and fp argument registers used. */
10256 words = crtl->args.info.words;
10257 n_gpr = crtl->args.info.regno;
10258 n_fpr = crtl->args.info.sse_regno;
10260 if (cfun->va_list_gpr_size)
10262 type = TREE_TYPE (gpr);
10263 t = build2 (MODIFY_EXPR, type,
10264 gpr, build_int_cst (type, n_gpr * 8));
10265 TREE_SIDE_EFFECTS (t) = 1;
10266 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10269 if (TARGET_SSE && cfun->va_list_fpr_size)
10271 type = TREE_TYPE (fpr);
10272 t = build2 (MODIFY_EXPR, type, fpr,
10273 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
10274 TREE_SIDE_EFFECTS (t) = 1;
10275 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10278 /* Find the overflow area. */
10279 type = TREE_TYPE (ovf);
10280 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10281 ovf_rtx = crtl->args.internal_arg_pointer;
10283 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
10284 t = make_tree (type, ovf_rtx);
10286 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
10288 /* Store zero bounds for overflow area pointer. */
10289 if (chkp_function_instrumented_p (current_function_decl))
10290 chkp_expand_bounds_reset_for_mem (ovf, t);
10292 t = build2 (MODIFY_EXPR, type, ovf, t);
10293 TREE_SIDE_EFFECTS (t) = 1;
10294 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10296 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
10298 /* Find the register save area.
10299 Prologue of the function save it right above stack frame. */
10300 type = TREE_TYPE (sav);
10301 t = make_tree (type, frame_pointer_rtx);
10302 if (!ix86_varargs_gpr_size)
10303 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
10305 /* Store zero bounds for save area pointer. */
10306 if (chkp_function_instrumented_p (current_function_decl))
10307 chkp_expand_bounds_reset_for_mem (sav, t);
10309 t = build2 (MODIFY_EXPR, type, sav, t);
10310 TREE_SIDE_EFFECTS (t) = 1;
10311 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10315 /* Implement va_arg. */
10318 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
10319 gimple_seq *post_p)
10321 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
10322 tree f_gpr, f_fpr, f_ovf, f_sav;
10323 tree gpr, fpr, ovf, sav, t;
10325 tree lab_false, lab_over = NULL_TREE;
10328 int indirect_p = 0;
10330 machine_mode nat_mode;
10331 unsigned int arg_boundary;
10333 /* Only 64bit target needs something special. */
10334 if (is_va_list_char_pointer (TREE_TYPE (valist)))
10335 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
10337 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
10338 f_fpr = DECL_CHAIN (f_gpr);
10339 f_ovf = DECL_CHAIN (f_fpr);
10340 f_sav = DECL_CHAIN (f_ovf);
10342 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
10343 valist, f_gpr, NULL_TREE);
10345 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
10346 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
10347 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
10349 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
10351 type = build_pointer_type (type);
10352 size = int_size_in_bytes (type);
10353 rsize = CEIL (size, UNITS_PER_WORD);
10355 nat_mode = type_natural_mode (type, NULL, false);
10370 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
10371 if (!TARGET_64BIT_MS_ABI)
10378 container = construct_container (nat_mode, TYPE_MODE (type),
10379 type, 0, X86_64_REGPARM_MAX,
10380 X86_64_SSE_REGPARM_MAX, intreg,
10385 /* Pull the value out of the saved registers. */
10387 addr = create_tmp_var (ptr_type_node, "addr");
10391 int needed_intregs, needed_sseregs;
10393 tree int_addr, sse_addr;
10395 lab_false = create_artificial_label (UNKNOWN_LOCATION);
10396 lab_over = create_artificial_label (UNKNOWN_LOCATION);
10398 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
10400 need_temp = (!REG_P (container)
10401 && ((needed_intregs && TYPE_ALIGN (type) > 64)
10402 || TYPE_ALIGN (type) > 128));
10404 /* In case we are passing structure, verify that it is consecutive block
10405 on the register save area. If not we need to do moves. */
10406 if (!need_temp && !REG_P (container))
10408 /* Verify that all registers are strictly consecutive */
10409 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
10413 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
10415 rtx slot = XVECEXP (container, 0, i);
10416 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
10417 || INTVAL (XEXP (slot, 1)) != i * 16)
10425 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
10427 rtx slot = XVECEXP (container, 0, i);
10428 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
10429 || INTVAL (XEXP (slot, 1)) != i * 8)
10441 int_addr = create_tmp_var (ptr_type_node, "int_addr");
10442 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
10445 /* First ensure that we fit completely in registers. */
10446 if (needed_intregs)
10448 t = build_int_cst (TREE_TYPE (gpr),
10449 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
10450 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
10451 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
10452 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
10453 gimplify_and_add (t, pre_p);
10455 if (needed_sseregs)
10457 t = build_int_cst (TREE_TYPE (fpr),
10458 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
10459 + X86_64_REGPARM_MAX * 8);
10460 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
10461 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
10462 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
10463 gimplify_and_add (t, pre_p);
10466 /* Compute index to start of area used for integer regs. */
10467 if (needed_intregs)
10469 /* int_addr = gpr + sav; */
10470 t = fold_build_pointer_plus (sav, gpr);
10471 gimplify_assign (int_addr, t, pre_p);
10473 if (needed_sseregs)
10475 /* sse_addr = fpr + sav; */
10476 t = fold_build_pointer_plus (sav, fpr);
10477 gimplify_assign (sse_addr, t, pre_p);
10481 int i, prev_size = 0;
10482 tree temp = create_tmp_var (type, "va_arg_tmp");
10484 /* addr = &temp; */
10485 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
10486 gimplify_assign (addr, t, pre_p);
10488 for (i = 0; i < XVECLEN (container, 0); i++)
10490 rtx slot = XVECEXP (container, 0, i);
10491 rtx reg = XEXP (slot, 0);
10492 machine_mode mode = GET_MODE (reg);
10496 tree src_addr, src;
10498 tree dest_addr, dest;
10499 int cur_size = GET_MODE_SIZE (mode);
10501 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
10502 prev_size = INTVAL (XEXP (slot, 1));
10503 if (prev_size + cur_size > size)
10505 cur_size = size - prev_size;
10506 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
10507 if (mode == BLKmode)
10510 piece_type = lang_hooks.types.type_for_mode (mode, 1);
10511 if (mode == GET_MODE (reg))
10512 addr_type = build_pointer_type (piece_type);
10514 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
10516 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
10519 if (SSE_REGNO_P (REGNO (reg)))
10521 src_addr = sse_addr;
10522 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
10526 src_addr = int_addr;
10527 src_offset = REGNO (reg) * 8;
10529 src_addr = fold_convert (addr_type, src_addr);
10530 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
10532 dest_addr = fold_convert (daddr_type, addr);
10533 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
10534 if (cur_size == GET_MODE_SIZE (mode))
10536 src = build_va_arg_indirect_ref (src_addr);
10537 dest = build_va_arg_indirect_ref (dest_addr);
10539 gimplify_assign (dest, src, pre_p);
10544 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
10545 3, dest_addr, src_addr,
10546 size_int (cur_size));
10547 gimplify_and_add (copy, pre_p);
10549 prev_size += cur_size;
10553 if (needed_intregs)
10555 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
10556 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
10557 gimplify_assign (gpr, t, pre_p);
10560 if (needed_sseregs)
10562 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
10563 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
10564 gimplify_assign (unshare_expr (fpr), t, pre_p);
10567 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
10569 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
10572 /* ... otherwise out of the overflow area. */
10574 /* When we align parameter on stack for caller, if the parameter
10575 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
10576 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
10577 here with caller. */
10578 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
10579 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
10580 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
10582 /* Care for on-stack alignment if needed. */
10583 if (arg_boundary <= 64 || size == 0)
10587 HOST_WIDE_INT align = arg_boundary / 8;
10588 t = fold_build_pointer_plus_hwi (ovf, align - 1);
10589 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
10590 build_int_cst (TREE_TYPE (t), -align));
10593 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
10594 gimplify_assign (addr, t, pre_p);
10596 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
10597 gimplify_assign (unshare_expr (ovf), t, pre_p);
10600 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
10602 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
10603 addr = fold_convert (ptrtype, addr);
10606 addr = build_va_arg_indirect_ref (addr);
10607 return build_va_arg_indirect_ref (addr);
10610 /* Return true if OPNUM's MEM should be matched
10611 in movabs* patterns. */
10614 ix86_check_movabs (rtx insn, int opnum)
10618 set = PATTERN (insn);
10619 if (GET_CODE (set) == PARALLEL)
10620 set = XVECEXP (set, 0, 0);
10621 gcc_assert (GET_CODE (set) == SET);
10622 mem = XEXP (set, opnum);
10623 while (SUBREG_P (mem))
10624 mem = SUBREG_REG (mem);
10625 gcc_assert (MEM_P (mem));
10626 return volatile_ok || !MEM_VOLATILE_P (mem);
10629 /* Return false if INSN contains a MEM with a non-default address space. */
10631 ix86_check_no_addr_space (rtx insn)
10633 subrtx_var_iterator::array_type array;
10634 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
10637 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
10643 /* Initialize the table of extra 80387 mathematical constants. */
10646 init_ext_80387_constants (void)
10648 static const char * cst[5] =
10650 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
10651 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
10652 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
10653 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
10654 "3.1415926535897932385128089594061862044", /* 4: fldpi */
10658 for (i = 0; i < 5; i++)
10660 real_from_string (&ext_80387_constants_table[i], cst[i]);
10661 /* Ensure each constant is rounded to XFmode precision. */
10662 real_convert (&ext_80387_constants_table[i],
10663 XFmode, &ext_80387_constants_table[i]);
10666 ext_80387_constants_init = 1;
10669 /* Return non-zero if the constant is something that
10670 can be loaded with a special instruction. */
10673 standard_80387_constant_p (rtx x)
10675 machine_mode mode = GET_MODE (x);
10677 const REAL_VALUE_TYPE *r;
10679 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
10682 if (x == CONST0_RTX (mode))
10684 if (x == CONST1_RTX (mode))
10687 r = CONST_DOUBLE_REAL_VALUE (x);
10689 /* For XFmode constants, try to find a special 80387 instruction when
10690 optimizing for size or on those CPUs that benefit from them. */
10692 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
10696 if (! ext_80387_constants_init)
10697 init_ext_80387_constants ();
10699 for (i = 0; i < 5; i++)
10700 if (real_identical (r, &ext_80387_constants_table[i]))
10704 /* Load of the constant -0.0 or -1.0 will be split as
10705 fldz;fchs or fld1;fchs sequence. */
10706 if (real_isnegzero (r))
10708 if (real_identical (r, &dconstm1))
10714 /* Return the opcode of the special instruction to be used to load
10718 standard_80387_constant_opcode (rtx x)
10720 switch (standard_80387_constant_p (x))
10740 gcc_unreachable ();
10744 /* Return the CONST_DOUBLE representing the 80387 constant that is
10745 loaded by the specified special instruction. The argument IDX
10746 matches the return value from standard_80387_constant_p. */
10749 standard_80387_constant_rtx (int idx)
10753 if (! ext_80387_constants_init)
10754 init_ext_80387_constants ();
10767 gcc_unreachable ();
10770 return const_double_from_real_value (ext_80387_constants_table[i],
10774 /* Return 1 if X is all 0s and 2 if x is all 1s
10775 in supported SSE/AVX vector mode. */
10778 standard_sse_constant_p (rtx x)
10785 mode = GET_MODE (x);
10787 if (x == const0_rtx || x == CONST0_RTX (mode))
10789 if (vector_all_ones_operand (x, mode))
10808 if (TARGET_AVX512F)
10817 /* Return the opcode of the special instruction to be used to load
10821 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
10823 switch (standard_sse_constant_p (x))
10826 switch (get_attr_mode (insn))
10829 return "vpxord\t%g0, %g0, %g0";
10831 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
10832 : "vpxord\t%g0, %g0, %g0";
10834 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
10835 : "vpxorq\t%g0, %g0, %g0";
10837 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
10838 : "%vpxor\t%0, %d0";
10840 return "%vxorpd\t%0, %d0";
10842 return "%vxorps\t%0, %d0";
10845 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
10846 : "vpxor\t%x0, %x0, %x0";
10848 return "vxorpd\t%x0, %x0, %x0";
10850 return "vxorps\t%x0, %x0, %x0";
10857 if (TARGET_AVX512VL
10858 || get_attr_mode (insn) == MODE_XI
10859 || get_attr_mode (insn) == MODE_V8DF
10860 || get_attr_mode (insn) == MODE_V16SF)
10861 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
10863 return "vpcmpeqd\t%0, %0, %0";
10865 return "pcmpeqd\t%0, %0";
10870 gcc_unreachable ();
10873 /* Returns true if OP contains a symbol reference */
10876 symbolic_reference_mentioned_p (rtx op)
10881 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
10884 fmt = GET_RTX_FORMAT (GET_CODE (op));
10885 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
10891 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
10892 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
10896 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
10903 /* Return true if it is appropriate to emit `ret' instructions in the
10904 body of a function. Do this only if the epilogue is simple, needing a
10905 couple of insns. Prior to reloading, we can't tell how many registers
10906 must be saved, so return false then. Return false if there is no frame
10907 marker to de-allocate. */
10910 ix86_can_use_return_insn_p (void)
10912 struct ix86_frame frame;
10914 if (! reload_completed || frame_pointer_needed)
10917 /* Don't allow more than 32k pop, since that's all we can do
10918 with one instruction. */
10919 if (crtl->args.pops_args && crtl->args.size >= 32768)
10922 ix86_compute_frame_layout (&frame);
10923 return (frame.stack_pointer_offset == UNITS_PER_WORD
10924 && (frame.nregs + frame.nsseregs) == 0);
10927 /* Value should be nonzero if functions must have frame pointers.
10928 Zero means the frame pointer need not be set up (and parms may
10929 be accessed via the stack pointer) in functions that seem suitable. */
10932 ix86_frame_pointer_required (void)
10934 /* If we accessed previous frames, then the generated code expects
10935 to be able to access the saved ebp value in our frame. */
10936 if (cfun->machine->accesses_prev_frame)
10939 /* Several x86 os'es need a frame pointer for other reasons,
10940 usually pertaining to setjmp. */
10941 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10944 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
10945 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
10948 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
10949 allocation is 4GB. */
10950 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
10953 /* SSE saves require frame-pointer when stack is misaligned. */
10954 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
10957 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
10958 turns off the frame pointer by default. Turn it back on now if
10959 we've not got a leaf function. */
10960 if (TARGET_OMIT_LEAF_FRAME_POINTER
10962 || ix86_current_function_calls_tls_descriptor))
10965 if (crtl->profile && !flag_fentry)
10971 /* Record that the current function accesses previous call frames. */
10974 ix86_setup_frame_addresses (void)
10976 cfun->machine->accesses_prev_frame = 1;
10979 #ifndef USE_HIDDEN_LINKONCE
10980 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
10981 # define USE_HIDDEN_LINKONCE 1
10983 # define USE_HIDDEN_LINKONCE 0
10987 static int pic_labels_used;
10989 /* Fills in the label name that should be used for a pc thunk for
10990 the given register. */
10993 get_pc_thunk_name (char name[32], unsigned int regno)
10995 gcc_assert (!TARGET_64BIT);
10997 if (USE_HIDDEN_LINKONCE)
10998 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
11000 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
11004 /* This function generates code for -fpic that loads %ebx with
11005 the return address of the caller and then returns. */
11008 ix86_code_end (void)
11013 for (regno = AX_REG; regno <= SP_REG; regno++)
11018 if (!(pic_labels_used & (1 << regno)))
11021 get_pc_thunk_name (name, regno);
11023 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11024 get_identifier (name),
11025 build_function_type_list (void_type_node, NULL_TREE));
11026 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11027 NULL_TREE, void_type_node);
11028 TREE_PUBLIC (decl) = 1;
11029 TREE_STATIC (decl) = 1;
11030 DECL_IGNORED_P (decl) = 1;
11035 switch_to_section (darwin_sections[text_coal_section]);
11036 fputs ("\t.weak_definition\t", asm_out_file);
11037 assemble_name (asm_out_file, name);
11038 fputs ("\n\t.private_extern\t", asm_out_file);
11039 assemble_name (asm_out_file, name);
11040 putc ('\n', asm_out_file);
11041 ASM_OUTPUT_LABEL (asm_out_file, name);
11042 DECL_WEAK (decl) = 1;
11046 if (USE_HIDDEN_LINKONCE)
11048 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
11050 targetm.asm_out.unique_section (decl, 0);
11051 switch_to_section (get_named_section (decl, NULL, 0));
11053 targetm.asm_out.globalize_label (asm_out_file, name);
11054 fputs ("\t.hidden\t", asm_out_file);
11055 assemble_name (asm_out_file, name);
11056 putc ('\n', asm_out_file);
11057 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
11061 switch_to_section (text_section);
11062 ASM_OUTPUT_LABEL (asm_out_file, name);
11065 DECL_INITIAL (decl) = make_node (BLOCK);
11066 current_function_decl = decl;
11067 allocate_struct_function (decl, false);
11068 init_function_start (decl);
11069 first_function_block_is_cold = false;
11070 /* Make sure unwind info is emitted for the thunk if needed. */
11071 final_start_function (emit_barrier (), asm_out_file, 1);
11073 /* Pad stack IP move with 4 instructions (two NOPs count
11074 as one instruction). */
11075 if (TARGET_PAD_SHORT_FUNCTION)
11080 fputs ("\tnop\n", asm_out_file);
11083 xops[0] = gen_rtx_REG (Pmode, regno);
11084 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11085 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
11086 output_asm_insn ("%!ret", NULL);
11087 final_end_function ();
11088 init_insn_lengths ();
11089 free_after_compilation (cfun);
11091 current_function_decl = NULL;
11094 if (flag_split_stack)
11095 file_end_indicate_split_stack ();
11098 /* Emit code for the SET_GOT patterns. */
11101 output_set_got (rtx dest, rtx label)
11107 if (TARGET_VXWORKS_RTP && flag_pic)
11109 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
11110 xops[2] = gen_rtx_MEM (Pmode,
11111 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
11112 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
11114 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
11115 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
11116 an unadorned address. */
11117 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
11118 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
11119 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
11123 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
11128 get_pc_thunk_name (name, REGNO (dest));
11129 pic_labels_used |= 1 << REGNO (dest);
11131 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
11132 xops[2] = gen_rtx_MEM (QImode, xops[2]);
11133 output_asm_insn ("%!call\t%X2", xops);
11136 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
11137 This is what will be referenced by the Mach-O PIC subsystem. */
11138 if (machopic_should_output_picbase_label () || !label)
11139 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
11141 /* When we are restoring the pic base at the site of a nonlocal label,
11142 and we decided to emit the pic base above, we will still output a
11143 local label used for calculating the correction offset (even though
11144 the offset will be 0 in that case). */
11146 targetm.asm_out.internal_label (asm_out_file, "L",
11147 CODE_LABEL_NUMBER (label));
11153 /* We don't need a pic base, we're not producing pic. */
11154 gcc_unreachable ();
11156 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
11157 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
11158 targetm.asm_out.internal_label (asm_out_file, "L",
11159 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
11163 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
11168 /* Generate an "push" pattern for input ARG. */
11173 struct machine_function *m = cfun->machine;
11175 if (m->fs.cfa_reg == stack_pointer_rtx)
11176 m->fs.cfa_offset += UNITS_PER_WORD;
11177 m->fs.sp_offset += UNITS_PER_WORD;
11179 if (REG_P (arg) && GET_MODE (arg) != word_mode)
11180 arg = gen_rtx_REG (word_mode, REGNO (arg));
11182 return gen_rtx_SET (gen_rtx_MEM (word_mode,
11183 gen_rtx_PRE_DEC (Pmode,
11184 stack_pointer_rtx)),
11188 /* Generate an "pop" pattern for input ARG. */
11193 if (REG_P (arg) && GET_MODE (arg) != word_mode)
11194 arg = gen_rtx_REG (word_mode, REGNO (arg));
11196 return gen_rtx_SET (arg,
11197 gen_rtx_MEM (word_mode,
11198 gen_rtx_POST_INC (Pmode,
11199 stack_pointer_rtx)));
11202 /* Return >= 0 if there is an unused call-clobbered register available
11203 for the entire function. */
11205 static unsigned int
11206 ix86_select_alt_pic_regnum (void)
11208 if (ix86_use_pseudo_pic_reg ())
11209 return INVALID_REGNUM;
11213 && !ix86_current_function_calls_tls_descriptor)
11216 /* Can't use the same register for both PIC and DRAP. */
11217 if (crtl->drap_reg)
11218 drap = REGNO (crtl->drap_reg);
11221 for (i = 2; i >= 0; --i)
11222 if (i != drap && !df_regs_ever_live_p (i))
11226 return INVALID_REGNUM;
11229 /* Return TRUE if we need to save REGNO. */
11232 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
11234 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
11235 && pic_offset_table_rtx)
11237 if (ix86_use_pseudo_pic_reg ())
11239 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
11240 _mcount in prologue. */
11241 if (!TARGET_64BIT && flag_pic && crtl->profile)
11244 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11246 || crtl->calls_eh_return
11247 || crtl->uses_const_pool
11248 || cfun->has_nonlocal_label)
11249 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
11252 if (crtl->calls_eh_return && maybe_eh_return)
11257 unsigned test = EH_RETURN_DATA_REGNO (i);
11258 if (test == INVALID_REGNUM)
11266 && regno == REGNO (crtl->drap_reg)
11267 && !cfun->machine->no_drap_save_restore)
11270 return (df_regs_ever_live_p (regno)
11271 && !call_used_regs[regno]
11272 && !fixed_regs[regno]
11273 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
11276 /* Return number of saved general prupose registers. */
11279 ix86_nsaved_regs (void)
11284 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11285 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
11290 /* Return number of saved SSE registers. */
11293 ix86_nsaved_sseregs (void)
11298 if (!TARGET_64BIT_MS_ABI)
11300 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11301 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
11306 /* Given FROM and TO register numbers, say whether this elimination is
11307 allowed. If stack alignment is needed, we can only replace argument
11308 pointer with hard frame pointer, or replace frame pointer with stack
11309 pointer. Otherwise, frame pointer elimination is automatically
11310 handled and all other eliminations are valid. */
11313 ix86_can_eliminate (const int from, const int to)
11315 if (stack_realign_fp)
11316 return ((from == ARG_POINTER_REGNUM
11317 && to == HARD_FRAME_POINTER_REGNUM)
11318 || (from == FRAME_POINTER_REGNUM
11319 && to == STACK_POINTER_REGNUM));
11321 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
11324 /* Return the offset between two registers, one to be eliminated, and the other
11325 its replacement, at the start of a routine. */
11328 ix86_initial_elimination_offset (int from, int to)
11330 struct ix86_frame frame;
11331 ix86_compute_frame_layout (&frame);
11333 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
11334 return frame.hard_frame_pointer_offset;
11335 else if (from == FRAME_POINTER_REGNUM
11336 && to == HARD_FRAME_POINTER_REGNUM)
11337 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
11340 gcc_assert (to == STACK_POINTER_REGNUM);
11342 if (from == ARG_POINTER_REGNUM)
11343 return frame.stack_pointer_offset;
11345 gcc_assert (from == FRAME_POINTER_REGNUM);
11346 return frame.stack_pointer_offset - frame.frame_pointer_offset;
11350 /* In a dynamically-aligned function, we can't know the offset from
11351 stack pointer to frame pointer, so we must ensure that setjmp
11352 eliminates fp against the hard fp (%ebp) rather than trying to
11353 index from %esp up to the top of the frame across a gap that is
11354 of unknown (at compile-time) size. */
11356 ix86_builtin_setjmp_frame_value (void)
11358 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
11361 /* When using -fsplit-stack, the allocation routines set a field in
11362 the TCB to the bottom of the stack plus this much space, measured
11365 #define SPLIT_STACK_AVAILABLE 256
11367 /* Fill structure ix86_frame about frame of currently computed function. */
11370 ix86_compute_frame_layout (struct ix86_frame *frame)
11372 unsigned HOST_WIDE_INT stack_alignment_needed;
11373 HOST_WIDE_INT offset;
11374 unsigned HOST_WIDE_INT preferred_alignment;
11375 HOST_WIDE_INT size = get_frame_size ();
11376 HOST_WIDE_INT to_allocate;
11378 frame->nregs = ix86_nsaved_regs ();
11379 frame->nsseregs = ix86_nsaved_sseregs ();
11381 /* 64-bit MS ABI seem to require stack alignment to be always 16,
11382 except for function prologues, leaf functions and when the defult
11383 incoming stack boundary is overriden at command line or via
11384 force_align_arg_pointer attribute. */
11385 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
11386 && (!crtl->is_leaf || cfun->calls_alloca != 0
11387 || ix86_current_function_calls_tls_descriptor
11388 || ix86_incoming_stack_boundary < 128))
11390 crtl->preferred_stack_boundary = 128;
11391 crtl->stack_alignment_needed = 128;
11394 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
11395 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
11397 gcc_assert (!size || stack_alignment_needed);
11398 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
11399 gcc_assert (preferred_alignment <= stack_alignment_needed);
11401 /* For SEH we have to limit the amount of code movement into the prologue.
11402 At present we do this via a BLOCKAGE, at which point there's very little
11403 scheduling that can be done, which means that there's very little point
11404 in doing anything except PUSHs. */
11406 cfun->machine->use_fast_prologue_epilogue = false;
11408 /* During reload iteration the amount of registers saved can change.
11409 Recompute the value as needed. Do not recompute when amount of registers
11410 didn't change as reload does multiple calls to the function and does not
11411 expect the decision to change within single iteration. */
11412 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
11413 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
11415 int count = frame->nregs;
11416 struct cgraph_node *node = cgraph_node::get (current_function_decl);
11418 cfun->machine->use_fast_prologue_epilogue_nregs = count;
11420 /* The fast prologue uses move instead of push to save registers. This
11421 is significantly longer, but also executes faster as modern hardware
11422 can execute the moves in parallel, but can't do that for push/pop.
11424 Be careful about choosing what prologue to emit: When function takes
11425 many instructions to execute we may use slow version as well as in
11426 case function is known to be outside hot spot (this is known with
11427 feedback only). Weight the size of function by number of registers
11428 to save as it is cheap to use one or two push instructions but very
11429 slow to use many of them. */
11431 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
11432 if (node->frequency < NODE_FREQUENCY_NORMAL
11433 || (flag_branch_probabilities
11434 && node->frequency < NODE_FREQUENCY_HOT))
11435 cfun->machine->use_fast_prologue_epilogue = false;
11437 cfun->machine->use_fast_prologue_epilogue
11438 = !expensive_function_p (count);
11441 frame->save_regs_using_mov
11442 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
11443 /* If static stack checking is enabled and done with probes,
11444 the registers need to be saved before allocating the frame. */
11445 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
11447 /* Skip return address. */
11448 offset = UNITS_PER_WORD;
11450 /* Skip pushed static chain. */
11451 if (ix86_static_chain_on_stack)
11452 offset += UNITS_PER_WORD;
11454 /* Skip saved base pointer. */
11455 if (frame_pointer_needed)
11456 offset += UNITS_PER_WORD;
11457 frame->hfp_save_offset = offset;
11459 /* The traditional frame pointer location is at the top of the frame. */
11460 frame->hard_frame_pointer_offset = offset;
11462 /* Register save area */
11463 offset += frame->nregs * UNITS_PER_WORD;
11464 frame->reg_save_offset = offset;
11466 /* On SEH target, registers are pushed just before the frame pointer
11469 frame->hard_frame_pointer_offset = offset;
11471 /* Align and set SSE register save area. */
11472 if (frame->nsseregs)
11474 /* The only ABI that has saved SSE registers (Win64) also has a
11475 16-byte aligned default stack, and thus we don't need to be
11476 within the re-aligned local stack frame to save them. In case
11477 incoming stack boundary is aligned to less than 16 bytes,
11478 unaligned move of SSE register will be emitted, so there is
11479 no point to round up the SSE register save area outside the
11480 re-aligned local stack frame to 16 bytes. */
11481 if (ix86_incoming_stack_boundary >= 128)
11482 offset = ROUND_UP (offset, 16);
11483 offset += frame->nsseregs * 16;
11485 frame->sse_reg_save_offset = offset;
11487 /* The re-aligned stack starts here. Values before this point are not
11488 directly comparable with values below this point. In order to make
11489 sure that no value happens to be the same before and after, force
11490 the alignment computation below to add a non-zero value. */
11491 if (stack_realign_fp)
11492 offset = ROUND_UP (offset, stack_alignment_needed);
11495 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
11496 offset += frame->va_arg_size;
11498 /* Align start of frame for local function. */
11499 if (stack_realign_fp
11500 || offset != frame->sse_reg_save_offset
11503 || cfun->calls_alloca
11504 || ix86_current_function_calls_tls_descriptor)
11505 offset = ROUND_UP (offset, stack_alignment_needed);
11507 /* Frame pointer points here. */
11508 frame->frame_pointer_offset = offset;
11512 /* Add outgoing arguments area. Can be skipped if we eliminated
11513 all the function calls as dead code.
11514 Skipping is however impossible when function calls alloca. Alloca
11515 expander assumes that last crtl->outgoing_args_size
11516 of stack frame are unused. */
11517 if (ACCUMULATE_OUTGOING_ARGS
11518 && (!crtl->is_leaf || cfun->calls_alloca
11519 || ix86_current_function_calls_tls_descriptor))
11521 offset += crtl->outgoing_args_size;
11522 frame->outgoing_arguments_size = crtl->outgoing_args_size;
11525 frame->outgoing_arguments_size = 0;
11527 /* Align stack boundary. Only needed if we're calling another function
11528 or using alloca. */
11529 if (!crtl->is_leaf || cfun->calls_alloca
11530 || ix86_current_function_calls_tls_descriptor)
11531 offset = ROUND_UP (offset, preferred_alignment);
11533 /* We've reached end of stack frame. */
11534 frame->stack_pointer_offset = offset;
11536 /* Size prologue needs to allocate. */
11537 to_allocate = offset - frame->sse_reg_save_offset;
11539 if ((!to_allocate && frame->nregs <= 1)
11540 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
11541 frame->save_regs_using_mov = false;
11543 if (ix86_using_red_zone ()
11544 && crtl->sp_is_unchanging
11546 && !ix86_pc_thunk_call_expanded
11547 && !ix86_current_function_calls_tls_descriptor)
11549 frame->red_zone_size = to_allocate;
11550 if (frame->save_regs_using_mov)
11551 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
11552 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
11553 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
11556 frame->red_zone_size = 0;
11557 frame->stack_pointer_offset -= frame->red_zone_size;
11559 /* The SEH frame pointer location is near the bottom of the frame.
11560 This is enforced by the fact that the difference between the
11561 stack pointer and the frame pointer is limited to 240 bytes in
11562 the unwind data structure. */
11565 HOST_WIDE_INT diff;
11567 /* If we can leave the frame pointer where it is, do so. Also, returns
11568 the establisher frame for __builtin_frame_address (0). */
11569 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
11570 if (diff <= SEH_MAX_FRAME_SIZE
11571 && (diff > 240 || (diff & 15) != 0)
11572 && !crtl->accesses_prior_frames)
11574 /* Ideally we'd determine what portion of the local stack frame
11575 (within the constraint of the lowest 240) is most heavily used.
11576 But without that complication, simply bias the frame pointer
11577 by 128 bytes so as to maximize the amount of the local stack
11578 frame that is addressable with 8-bit offsets. */
11579 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
11584 /* This is semi-inlined memory_address_length, but simplified
11585 since we know that we're always dealing with reg+offset, and
11586 to avoid having to create and discard all that rtl. */
11589 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
11595 /* EBP and R13 cannot be encoded without an offset. */
11596 len = (regno == BP_REG || regno == R13_REG);
11598 else if (IN_RANGE (offset, -128, 127))
11601 /* ESP and R12 must be encoded with a SIB byte. */
11602 if (regno == SP_REG || regno == R12_REG)
11608 /* Return an RTX that points to CFA_OFFSET within the stack frame.
11609 The valid base registers are taken from CFUN->MACHINE->FS. */
11612 choose_baseaddr (HOST_WIDE_INT cfa_offset)
11614 const struct machine_function *m = cfun->machine;
11615 rtx base_reg = NULL;
11616 HOST_WIDE_INT base_offset = 0;
11618 if (m->use_fast_prologue_epilogue)
11620 /* Choose the base register most likely to allow the most scheduling
11621 opportunities. Generally FP is valid throughout the function,
11622 while DRAP must be reloaded within the epilogue. But choose either
11623 over the SP due to increased encoding size. */
11625 if (m->fs.fp_valid)
11627 base_reg = hard_frame_pointer_rtx;
11628 base_offset = m->fs.fp_offset - cfa_offset;
11630 else if (m->fs.drap_valid)
11632 base_reg = crtl->drap_reg;
11633 base_offset = 0 - cfa_offset;
11635 else if (m->fs.sp_valid)
11637 base_reg = stack_pointer_rtx;
11638 base_offset = m->fs.sp_offset - cfa_offset;
11643 HOST_WIDE_INT toffset;
11644 int len = 16, tlen;
11646 /* Choose the base register with the smallest address encoding.
11647 With a tie, choose FP > DRAP > SP. */
11648 if (m->fs.sp_valid)
11650 base_reg = stack_pointer_rtx;
11651 base_offset = m->fs.sp_offset - cfa_offset;
11652 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
11654 if (m->fs.drap_valid)
11656 toffset = 0 - cfa_offset;
11657 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
11660 base_reg = crtl->drap_reg;
11661 base_offset = toffset;
11665 if (m->fs.fp_valid)
11667 toffset = m->fs.fp_offset - cfa_offset;
11668 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
11671 base_reg = hard_frame_pointer_rtx;
11672 base_offset = toffset;
11677 gcc_assert (base_reg != NULL);
11679 return plus_constant (Pmode, base_reg, base_offset);
11682 /* Emit code to save registers in the prologue. */
11685 ix86_emit_save_regs (void)
11687 unsigned int regno;
11690 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
11691 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
11693 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
11694 RTX_FRAME_RELATED_P (insn) = 1;
11698 /* Emit a single register save at CFA - CFA_OFFSET. */
11701 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
11702 HOST_WIDE_INT cfa_offset)
11704 struct machine_function *m = cfun->machine;
11705 rtx reg = gen_rtx_REG (mode, regno);
11706 rtx unspec = NULL_RTX;
11707 rtx mem, addr, base, insn;
11708 unsigned int align;
11710 addr = choose_baseaddr (cfa_offset);
11711 mem = gen_frame_mem (mode, addr);
11713 /* The location is aligned up to INCOMING_STACK_BOUNDARY. */
11714 align = MIN (GET_MODE_ALIGNMENT (mode), INCOMING_STACK_BOUNDARY);
11715 set_mem_align (mem, align);
11717 /* SSE saves are not within re-aligned local stack frame.
11718 In case INCOMING_STACK_BOUNDARY is misaligned, we have
11719 to emit unaligned store. */
11720 if (mode == V4SFmode && align < 128)
11721 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (1, reg), UNSPEC_STOREU);
11723 insn = emit_insn (gen_rtx_SET (mem, unspec ? unspec : reg));
11724 RTX_FRAME_RELATED_P (insn) = 1;
11727 if (GET_CODE (base) == PLUS)
11728 base = XEXP (base, 0);
11729 gcc_checking_assert (REG_P (base));
11731 /* When saving registers into a re-aligned local stack frame, avoid
11732 any tricky guessing by dwarf2out. */
11733 if (m->fs.realigned)
11735 gcc_checking_assert (stack_realign_drap);
11737 if (regno == REGNO (crtl->drap_reg))
11739 /* A bit of a hack. We force the DRAP register to be saved in
11740 the re-aligned stack frame, which provides us with a copy
11741 of the CFA that will last past the prologue. Install it. */
11742 gcc_checking_assert (cfun->machine->fs.fp_valid);
11743 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
11744 cfun->machine->fs.fp_offset - cfa_offset);
11745 mem = gen_rtx_MEM (mode, addr);
11746 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
11750 /* The frame pointer is a stable reference within the
11751 aligned frame. Use it. */
11752 gcc_checking_assert (cfun->machine->fs.fp_valid);
11753 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
11754 cfun->machine->fs.fp_offset - cfa_offset);
11755 mem = gen_rtx_MEM (mode, addr);
11756 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
11760 /* The memory may not be relative to the current CFA register,
11761 which means that we may need to generate a new pattern for
11762 use by the unwind info. */
11763 else if (base != m->fs.cfa_reg)
11765 addr = plus_constant (Pmode, m->fs.cfa_reg,
11766 m->fs.cfa_offset - cfa_offset);
11767 mem = gen_rtx_MEM (mode, addr);
11768 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
11771 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
11774 /* Emit code to save registers using MOV insns.
11775 First register is stored at CFA - CFA_OFFSET. */
11777 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
11779 unsigned int regno;
11781 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11782 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
11784 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
11785 cfa_offset -= UNITS_PER_WORD;
11789 /* Emit code to save SSE registers using MOV insns.
11790 First register is stored at CFA - CFA_OFFSET. */
11792 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
11794 unsigned int regno;
11796 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11797 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
11799 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
11800 cfa_offset -= GET_MODE_SIZE (V4SFmode);
11804 static GTY(()) rtx queued_cfa_restores;
11806 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
11807 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
11808 Don't add the note if the previously saved value will be left untouched
11809 within stack red-zone till return, as unwinders can find the same value
11810 in the register and on the stack. */
11813 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
11815 if (!crtl->shrink_wrapped
11816 && cfa_offset <= cfun->machine->fs.red_zone_offset)
11821 add_reg_note (insn, REG_CFA_RESTORE, reg);
11822 RTX_FRAME_RELATED_P (insn) = 1;
11825 queued_cfa_restores
11826 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
11829 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
11832 ix86_add_queued_cfa_restore_notes (rtx insn)
11835 if (!queued_cfa_restores)
11837 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
11839 XEXP (last, 1) = REG_NOTES (insn);
11840 REG_NOTES (insn) = queued_cfa_restores;
11841 queued_cfa_restores = NULL_RTX;
11842 RTX_FRAME_RELATED_P (insn) = 1;
11845 /* Expand prologue or epilogue stack adjustment.
11846 The pattern exist to put a dependency on all ebp-based memory accesses.
11847 STYLE should be negative if instructions should be marked as frame related,
11848 zero if %r11 register is live and cannot be freely used and positive
11852 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
11853 int style, bool set_cfa)
11855 struct machine_function *m = cfun->machine;
11857 bool add_frame_related_expr = false;
11859 if (Pmode == SImode)
11860 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
11861 else if (x86_64_immediate_operand (offset, DImode))
11862 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
11866 /* r11 is used by indirect sibcall return as well, set before the
11867 epilogue and used after the epilogue. */
11869 tmp = gen_rtx_REG (DImode, R11_REG);
11872 gcc_assert (src != hard_frame_pointer_rtx
11873 && dest != hard_frame_pointer_rtx);
11874 tmp = hard_frame_pointer_rtx;
11876 insn = emit_insn (gen_rtx_SET (tmp, offset));
11878 add_frame_related_expr = true;
11880 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
11883 insn = emit_insn (insn);
11885 ix86_add_queued_cfa_restore_notes (insn);
11891 gcc_assert (m->fs.cfa_reg == src);
11892 m->fs.cfa_offset += INTVAL (offset);
11893 m->fs.cfa_reg = dest;
11895 r = gen_rtx_PLUS (Pmode, src, offset);
11896 r = gen_rtx_SET (dest, r);
11897 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
11898 RTX_FRAME_RELATED_P (insn) = 1;
11900 else if (style < 0)
11902 RTX_FRAME_RELATED_P (insn) = 1;
11903 if (add_frame_related_expr)
11905 rtx r = gen_rtx_PLUS (Pmode, src, offset);
11906 r = gen_rtx_SET (dest, r);
11907 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
11911 if (dest == stack_pointer_rtx)
11913 HOST_WIDE_INT ooffset = m->fs.sp_offset;
11914 bool valid = m->fs.sp_valid;
11916 if (src == hard_frame_pointer_rtx)
11918 valid = m->fs.fp_valid;
11919 ooffset = m->fs.fp_offset;
11921 else if (src == crtl->drap_reg)
11923 valid = m->fs.drap_valid;
11928 /* Else there are two possibilities: SP itself, which we set
11929 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
11930 taken care of this by hand along the eh_return path. */
11931 gcc_checking_assert (src == stack_pointer_rtx
11932 || offset == const0_rtx);
11935 m->fs.sp_offset = ooffset - INTVAL (offset);
11936 m->fs.sp_valid = valid;
11940 /* Find an available register to be used as dynamic realign argument
11941 pointer regsiter. Such a register will be written in prologue and
11942 used in begin of body, so it must not be
11943 1. parameter passing register.
11945 We reuse static-chain register if it is available. Otherwise, we
11946 use DI for i386 and R13 for x86-64. We chose R13 since it has
11949 Return: the regno of chosen register. */
11951 static unsigned int
11952 find_drap_reg (void)
11954 tree decl = cfun->decl;
11958 /* Use R13 for nested function or function need static chain.
11959 Since function with tail call may use any caller-saved
11960 registers in epilogue, DRAP must not use caller-saved
11961 register in such case. */
11962 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
11969 /* Use DI for nested function or function need static chain.
11970 Since function with tail call may use any caller-saved
11971 registers in epilogue, DRAP must not use caller-saved
11972 register in such case. */
11973 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
11976 /* Reuse static chain register if it isn't used for parameter
11978 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
11980 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
11981 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
11988 /* Handle a "force_align_arg_pointer" attribute. */
11991 ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name,
11992 tree, int, bool *no_add_attrs)
11994 if (TREE_CODE (*node) != FUNCTION_TYPE
11995 && TREE_CODE (*node) != METHOD_TYPE
11996 && TREE_CODE (*node) != FIELD_DECL
11997 && TREE_CODE (*node) != TYPE_DECL)
11999 warning (OPT_Wattributes, "%qE attribute only applies to functions",
12001 *no_add_attrs = true;
12007 /* Return minimum incoming stack alignment. */
12009 static unsigned int
12010 ix86_minimum_incoming_stack_boundary (bool sibcall)
12012 unsigned int incoming_stack_boundary;
12014 /* Prefer the one specified at command line. */
12015 if (ix86_user_incoming_stack_boundary)
12016 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
12017 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
12018 if -mstackrealign is used, it isn't used for sibcall check and
12019 estimated stack alignment is 128bit. */
12021 && ix86_force_align_arg_pointer
12022 && crtl->stack_alignment_estimated == 128)
12023 incoming_stack_boundary = MIN_STACK_BOUNDARY;
12025 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
12027 /* Incoming stack alignment can be changed on individual functions
12028 via force_align_arg_pointer attribute. We use the smallest
12029 incoming stack boundary. */
12030 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
12031 && lookup_attribute (ix86_force_align_arg_pointer_string,
12032 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
12033 incoming_stack_boundary = MIN_STACK_BOUNDARY;
12035 /* The incoming stack frame has to be aligned at least at
12036 parm_stack_boundary. */
12037 if (incoming_stack_boundary < crtl->parm_stack_boundary)
12038 incoming_stack_boundary = crtl->parm_stack_boundary;
12040 /* Stack at entrance of main is aligned by runtime. We use the
12041 smallest incoming stack boundary. */
12042 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
12043 && DECL_NAME (current_function_decl)
12044 && MAIN_NAME_P (DECL_NAME (current_function_decl))
12045 && DECL_FILE_SCOPE_P (current_function_decl))
12046 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
12048 return incoming_stack_boundary;
12051 /* Update incoming stack boundary and estimated stack alignment. */
12054 ix86_update_stack_boundary (void)
12056 ix86_incoming_stack_boundary
12057 = ix86_minimum_incoming_stack_boundary (false);
12059 /* x86_64 vararg needs 16byte stack alignment for register save
12063 && crtl->stack_alignment_estimated < 128)
12064 crtl->stack_alignment_estimated = 128;
12066 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
12067 if (ix86_tls_descriptor_calls_expanded_in_cfun
12068 && crtl->preferred_stack_boundary < 128)
12069 crtl->preferred_stack_boundary = 128;
12072 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
12073 needed or an rtx for DRAP otherwise. */
12076 ix86_get_drap_rtx (void)
12078 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
12079 crtl->need_drap = true;
12081 if (stack_realign_drap)
12083 /* Assign DRAP to vDRAP and returns vDRAP */
12084 unsigned int regno = find_drap_reg ();
12087 rtx_insn *seq, *insn;
12089 arg_ptr = gen_rtx_REG (Pmode, regno);
12090 crtl->drap_reg = arg_ptr;
12093 drap_vreg = copy_to_reg (arg_ptr);
12094 seq = get_insns ();
12097 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
12100 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
12101 RTX_FRAME_RELATED_P (insn) = 1;
12109 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
12112 ix86_internal_arg_pointer (void)
12114 return virtual_incoming_args_rtx;
12117 struct scratch_reg {
12122 /* Return a short-lived scratch register for use on function entry.
12123 In 32-bit mode, it is valid only after the registers are saved
12124 in the prologue. This register must be released by means of
12125 release_scratch_register_on_entry once it is dead. */
12128 get_scratch_register_on_entry (struct scratch_reg *sr)
12136 /* We always use R11 in 64-bit mode. */
12141 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
12143 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12145 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12146 bool static_chain_p = DECL_STATIC_CHAIN (decl);
12147 int regparm = ix86_function_regparm (fntype, decl);
12149 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
12151 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
12152 for the static chain register. */
12153 if ((regparm < 1 || (fastcall_p && !static_chain_p))
12154 && drap_regno != AX_REG)
12156 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
12157 for the static chain register. */
12158 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
12160 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
12162 /* ecx is the static chain register. */
12163 else if (regparm < 3 && !fastcall_p && !thiscall_p
12165 && drap_regno != CX_REG)
12167 else if (ix86_save_reg (BX_REG, true))
12169 /* esi is the static chain register. */
12170 else if (!(regparm == 3 && static_chain_p)
12171 && ix86_save_reg (SI_REG, true))
12173 else if (ix86_save_reg (DI_REG, true))
12177 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
12182 sr->reg = gen_rtx_REG (Pmode, regno);
12185 rtx_insn *insn = emit_insn (gen_push (sr->reg));
12186 RTX_FRAME_RELATED_P (insn) = 1;
12190 /* Release a scratch register obtained from the preceding function. */
12193 release_scratch_register_on_entry (struct scratch_reg *sr)
12197 struct machine_function *m = cfun->machine;
12198 rtx x, insn = emit_insn (gen_pop (sr->reg));
12200 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
12201 RTX_FRAME_RELATED_P (insn) = 1;
12202 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
12203 x = gen_rtx_SET (stack_pointer_rtx, x);
12204 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
12205 m->fs.sp_offset -= UNITS_PER_WORD;
12209 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
12211 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
12214 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
12216 /* We skip the probe for the first interval + a small dope of 4 words and
12217 probe that many bytes past the specified size to maintain a protection
12218 area at the botton of the stack. */
12219 const int dope = 4 * UNITS_PER_WORD;
12220 rtx size_rtx = GEN_INT (size), last;
12222 /* See if we have a constant small number of probes to generate. If so,
12223 that's the easy case. The run-time loop is made up of 9 insns in the
12224 generic case while the compile-time loop is made up of 3+2*(n-1) insns
12225 for n # of intervals. */
12226 if (size <= 4 * PROBE_INTERVAL)
12228 HOST_WIDE_INT i, adjust;
12229 bool first_probe = true;
12231 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
12232 values of N from 1 until it exceeds SIZE. If only one probe is
12233 needed, this will not generate any code. Then adjust and probe
12234 to PROBE_INTERVAL + SIZE. */
12235 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
12239 adjust = 2 * PROBE_INTERVAL + dope;
12240 first_probe = false;
12243 adjust = PROBE_INTERVAL;
12245 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12246 plus_constant (Pmode, stack_pointer_rtx,
12248 emit_stack_probe (stack_pointer_rtx);
12252 adjust = size + PROBE_INTERVAL + dope;
12254 adjust = size + PROBE_INTERVAL - i;
12256 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12257 plus_constant (Pmode, stack_pointer_rtx,
12259 emit_stack_probe (stack_pointer_rtx);
12261 /* Adjust back to account for the additional first interval. */
12262 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12263 plus_constant (Pmode, stack_pointer_rtx,
12264 PROBE_INTERVAL + dope)));
12267 /* Otherwise, do the same as above, but in a loop. Note that we must be
12268 extra careful with variables wrapping around because we might be at
12269 the very top (or the very bottom) of the address space and we have
12270 to be able to handle this case properly; in particular, we use an
12271 equality test for the loop condition. */
12274 HOST_WIDE_INT rounded_size;
12275 struct scratch_reg sr;
12277 get_scratch_register_on_entry (&sr);
12280 /* Step 1: round SIZE to the previous multiple of the interval. */
12282 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
12285 /* Step 2: compute initial and final value of the loop counter. */
12287 /* SP = SP_0 + PROBE_INTERVAL. */
12288 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12289 plus_constant (Pmode, stack_pointer_rtx,
12290 - (PROBE_INTERVAL + dope))));
12292 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
12293 if (rounded_size <= (HOST_WIDE_INT_1 << 31))
12294 emit_insn (gen_rtx_SET (sr.reg,
12295 plus_constant (Pmode, stack_pointer_rtx,
12299 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
12300 emit_insn (gen_rtx_SET (sr.reg,
12301 gen_rtx_PLUS (Pmode, sr.reg,
12302 stack_pointer_rtx)));
12306 /* Step 3: the loop
12310 SP = SP + PROBE_INTERVAL
12313 while (SP != LAST_ADDR)
12315 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
12316 values of N from 1 until it is equal to ROUNDED_SIZE. */
12318 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
12321 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
12322 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
12324 if (size != rounded_size)
12326 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12327 plus_constant (Pmode, stack_pointer_rtx,
12328 rounded_size - size)));
12329 emit_stack_probe (stack_pointer_rtx);
12332 /* Adjust back to account for the additional first interval. */
12333 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12334 plus_constant (Pmode, stack_pointer_rtx,
12335 PROBE_INTERVAL + dope)));
12337 release_scratch_register_on_entry (&sr);
12340 /* Even if the stack pointer isn't the CFA register, we need to correctly
12341 describe the adjustments made to it, in particular differentiate the
12342 frame-related ones from the frame-unrelated ones. */
12345 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
12346 XVECEXP (expr, 0, 0)
12347 = gen_rtx_SET (stack_pointer_rtx,
12348 plus_constant (Pmode, stack_pointer_rtx, -size));
12349 XVECEXP (expr, 0, 1)
12350 = gen_rtx_SET (stack_pointer_rtx,
12351 plus_constant (Pmode, stack_pointer_rtx,
12352 PROBE_INTERVAL + dope + size));
12353 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
12354 RTX_FRAME_RELATED_P (last) = 1;
12356 cfun->machine->fs.sp_offset += size;
12359 /* Make sure nothing is scheduled before we are done. */
12360 emit_insn (gen_blockage ());
12363 /* Adjust the stack pointer up to REG while probing it. */
12366 output_adjust_stack_and_probe (rtx reg)
12368 static int labelno = 0;
12372 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
12375 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
12377 /* SP = SP + PROBE_INTERVAL. */
12378 xops[0] = stack_pointer_rtx;
12379 xops[1] = GEN_INT (PROBE_INTERVAL);
12380 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
12383 xops[1] = const0_rtx;
12384 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
12386 /* Test if SP == LAST_ADDR. */
12387 xops[0] = stack_pointer_rtx;
12389 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
12392 fputs ("\tjne\t", asm_out_file);
12393 assemble_name_raw (asm_out_file, loop_lab);
12394 fputc ('\n', asm_out_file);
12399 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
12400 inclusive. These are offsets from the current stack pointer. */
12403 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
12405 /* See if we have a constant small number of probes to generate. If so,
12406 that's the easy case. The run-time loop is made up of 6 insns in the
12407 generic case while the compile-time loop is made up of n insns for n #
12409 if (size <= 6 * PROBE_INTERVAL)
12413 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
12414 it exceeds SIZE. If only one probe is needed, this will not
12415 generate any code. Then probe at FIRST + SIZE. */
12416 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
12417 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
12420 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
12424 /* Otherwise, do the same as above, but in a loop. Note that we must be
12425 extra careful with variables wrapping around because we might be at
12426 the very top (or the very bottom) of the address space and we have
12427 to be able to handle this case properly; in particular, we use an
12428 equality test for the loop condition. */
12431 HOST_WIDE_INT rounded_size, last;
12432 struct scratch_reg sr;
12434 get_scratch_register_on_entry (&sr);
12437 /* Step 1: round SIZE to the previous multiple of the interval. */
12439 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
12442 /* Step 2: compute initial and final value of the loop counter. */
12444 /* TEST_OFFSET = FIRST. */
12445 emit_move_insn (sr.reg, GEN_INT (-first));
12447 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
12448 last = first + rounded_size;
12451 /* Step 3: the loop
12455 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
12458 while (TEST_ADDR != LAST_ADDR)
12460 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
12461 until it is equal to ROUNDED_SIZE. */
12463 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
12466 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
12467 that SIZE is equal to ROUNDED_SIZE. */
12469 if (size != rounded_size)
12470 emit_stack_probe (plus_constant (Pmode,
12471 gen_rtx_PLUS (Pmode,
12474 rounded_size - size));
12476 release_scratch_register_on_entry (&sr);
12479 /* Make sure nothing is scheduled before we are done. */
12480 emit_insn (gen_blockage ());
12483 /* Probe a range of stack addresses from REG to END, inclusive. These are
12484 offsets from the current stack pointer. */
12487 output_probe_stack_range (rtx reg, rtx end)
12489 static int labelno = 0;
12493 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
12496 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
12498 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
12500 xops[1] = GEN_INT (PROBE_INTERVAL);
12501 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
12503 /* Probe at TEST_ADDR. */
12504 xops[0] = stack_pointer_rtx;
12506 xops[2] = const0_rtx;
12507 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
12509 /* Test if TEST_ADDR == LAST_ADDR. */
12512 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
12515 fputs ("\tjne\t", asm_out_file);
12516 assemble_name_raw (asm_out_file, loop_lab);
12517 fputc ('\n', asm_out_file);
12522 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
12523 to be generated in correct form. */
12525 ix86_finalize_stack_realign_flags (void)
12527 /* Check if stack realign is really needed after reload, and
12528 stores result in cfun */
12529 unsigned int incoming_stack_boundary
12530 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
12531 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
12532 unsigned int stack_realign
12533 = (incoming_stack_boundary
12534 < (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
12535 ? crtl->max_used_stack_slot_alignment
12536 : crtl->stack_alignment_needed));
12538 if (crtl->stack_realign_finalized)
12540 /* After stack_realign_needed is finalized, we can't no longer
12542 gcc_assert (crtl->stack_realign_needed == stack_realign);
12546 /* If the only reason for frame_pointer_needed is that we conservatively
12547 assumed stack realignment might be needed, but in the end nothing that
12548 needed the stack alignment had been spilled, clear frame_pointer_needed
12549 and say we don't need stack realignment. */
12551 && frame_pointer_needed
12553 && flag_omit_frame_pointer
12554 && crtl->sp_is_unchanging
12555 && !ix86_current_function_calls_tls_descriptor
12556 && !crtl->accesses_prior_frames
12557 && !cfun->calls_alloca
12558 && !crtl->calls_eh_return
12559 /* See ira_setup_eliminable_regset for the rationale. */
12560 && !(STACK_CHECK_MOVING_SP
12561 && flag_stack_check
12563 && cfun->can_throw_non_call_exceptions)
12564 && !ix86_frame_pointer_required ()
12565 && get_frame_size () == 0
12566 && ix86_nsaved_sseregs () == 0
12567 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
12569 HARD_REG_SET set_up_by_prologue, prologue_used;
12572 CLEAR_HARD_REG_SET (prologue_used);
12573 CLEAR_HARD_REG_SET (set_up_by_prologue);
12574 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
12575 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
12576 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
12577 HARD_FRAME_POINTER_REGNUM);
12578 FOR_EACH_BB_FN (bb, cfun)
12581 FOR_BB_INSNS (bb, insn)
12582 if (NONDEBUG_INSN_P (insn)
12583 && requires_stack_frame_p (insn, prologue_used,
12584 set_up_by_prologue))
12586 crtl->stack_realign_needed = stack_realign;
12587 crtl->stack_realign_finalized = true;
12592 /* If drap has been set, but it actually isn't live at the start
12593 of the function, there is no reason to set it up. */
12594 if (crtl->drap_reg)
12596 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
12597 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
12599 crtl->drap_reg = NULL_RTX;
12600 crtl->need_drap = false;
12604 cfun->machine->no_drap_save_restore = true;
12606 frame_pointer_needed = false;
12607 stack_realign = false;
12608 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
12609 crtl->stack_alignment_needed = incoming_stack_boundary;
12610 crtl->stack_alignment_estimated = incoming_stack_boundary;
12611 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
12612 crtl->preferred_stack_boundary = incoming_stack_boundary;
12613 df_finish_pass (true);
12614 df_scan_alloc (NULL);
12616 df_compute_regs_ever_live (true);
12620 crtl->stack_realign_needed = stack_realign;
12621 crtl->stack_realign_finalized = true;
12624 /* Delete SET_GOT right after entry block if it is allocated to reg. */
12627 ix86_elim_entry_set_got (rtx reg)
12629 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
12630 rtx_insn *c_insn = BB_HEAD (bb);
12631 if (!NONDEBUG_INSN_P (c_insn))
12632 c_insn = next_nonnote_nondebug_insn (c_insn);
12633 if (c_insn && NONJUMP_INSN_P (c_insn))
12635 rtx pat = PATTERN (c_insn);
12636 if (GET_CODE (pat) == PARALLEL)
12638 rtx vec = XVECEXP (pat, 0, 0);
12639 if (GET_CODE (vec) == SET
12640 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
12641 && REGNO (XEXP (vec, 0)) == REGNO (reg))
12642 delete_insn (c_insn);
12647 /* Expand the prologue into a bunch of separate insns. */
12650 ix86_expand_prologue (void)
12652 struct machine_function *m = cfun->machine;
12654 struct ix86_frame frame;
12655 HOST_WIDE_INT allocate;
12656 bool int_registers_saved;
12657 bool sse_registers_saved;
12658 rtx static_chain = NULL_RTX;
12660 ix86_finalize_stack_realign_flags ();
12662 /* DRAP should not coexist with stack_realign_fp */
12663 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
12665 memset (&m->fs, 0, sizeof (m->fs));
12667 /* Initialize CFA state for before the prologue. */
12668 m->fs.cfa_reg = stack_pointer_rtx;
12669 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
12671 /* Track SP offset to the CFA. We continue tracking this after we've
12672 swapped the CFA register away from SP. In the case of re-alignment
12673 this is fudged; we're interested to offsets within the local frame. */
12674 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
12675 m->fs.sp_valid = true;
12677 ix86_compute_frame_layout (&frame);
12679 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
12681 /* We should have already generated an error for any use of
12682 ms_hook on a nested function. */
12683 gcc_checking_assert (!ix86_static_chain_on_stack);
12685 /* Check if profiling is active and we shall use profiling before
12686 prologue variant. If so sorry. */
12687 if (crtl->profile && flag_fentry != 0)
12688 sorry ("ms_hook_prologue attribute isn%'t compatible "
12689 "with -mfentry for 32-bit");
12691 /* In ix86_asm_output_function_label we emitted:
12692 8b ff movl.s %edi,%edi
12694 8b ec movl.s %esp,%ebp
12696 This matches the hookable function prologue in Win32 API
12697 functions in Microsoft Windows XP Service Pack 2 and newer.
12698 Wine uses this to enable Windows apps to hook the Win32 API
12699 functions provided by Wine.
12701 What that means is that we've already set up the frame pointer. */
12703 if (frame_pointer_needed
12704 && !(crtl->drap_reg && crtl->stack_realign_needed))
12708 /* We've decided to use the frame pointer already set up.
12709 Describe this to the unwinder by pretending that both
12710 push and mov insns happen right here.
12712 Putting the unwind info here at the end of the ms_hook
12713 is done so that we can make absolutely certain we get
12714 the required byte sequence at the start of the function,
12715 rather than relying on an assembler that can produce
12716 the exact encoding required.
12718 However it does mean (in the unpatched case) that we have
12719 a 1 insn window where the asynchronous unwind info is
12720 incorrect. However, if we placed the unwind info at
12721 its correct location we would have incorrect unwind info
12722 in the patched case. Which is probably all moot since
12723 I don't expect Wine generates dwarf2 unwind info for the
12724 system libraries that use this feature. */
12726 insn = emit_insn (gen_blockage ());
12728 push = gen_push (hard_frame_pointer_rtx);
12729 mov = gen_rtx_SET (hard_frame_pointer_rtx,
12730 stack_pointer_rtx);
12731 RTX_FRAME_RELATED_P (push) = 1;
12732 RTX_FRAME_RELATED_P (mov) = 1;
12734 RTX_FRAME_RELATED_P (insn) = 1;
12735 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
12736 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
12738 /* Note that gen_push incremented m->fs.cfa_offset, even
12739 though we didn't emit the push insn here. */
12740 m->fs.cfa_reg = hard_frame_pointer_rtx;
12741 m->fs.fp_offset = m->fs.cfa_offset;
12742 m->fs.fp_valid = true;
12746 /* The frame pointer is not needed so pop %ebp again.
12747 This leaves us with a pristine state. */
12748 emit_insn (gen_pop (hard_frame_pointer_rtx));
12752 /* The first insn of a function that accepts its static chain on the
12753 stack is to push the register that would be filled in by a direct
12754 call. This insn will be skipped by the trampoline. */
12755 else if (ix86_static_chain_on_stack)
12757 static_chain = ix86_static_chain (cfun->decl, false);
12758 insn = emit_insn (gen_push (static_chain));
12759 emit_insn (gen_blockage ());
12761 /* We don't want to interpret this push insn as a register save,
12762 only as a stack adjustment. The real copy of the register as
12763 a save will be done later, if needed. */
12764 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
12765 t = gen_rtx_SET (stack_pointer_rtx, t);
12766 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
12767 RTX_FRAME_RELATED_P (insn) = 1;
12770 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
12771 of DRAP is needed and stack realignment is really needed after reload */
12772 if (stack_realign_drap)
12774 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
12776 /* Only need to push parameter pointer reg if it is caller saved. */
12777 if (!call_used_regs[REGNO (crtl->drap_reg)])
12779 /* Push arg pointer reg */
12780 insn = emit_insn (gen_push (crtl->drap_reg));
12781 RTX_FRAME_RELATED_P (insn) = 1;
12784 /* Grab the argument pointer. */
12785 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
12786 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
12787 RTX_FRAME_RELATED_P (insn) = 1;
12788 m->fs.cfa_reg = crtl->drap_reg;
12789 m->fs.cfa_offset = 0;
12791 /* Align the stack. */
12792 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
12794 GEN_INT (-align_bytes)));
12795 RTX_FRAME_RELATED_P (insn) = 1;
12797 /* Replicate the return address on the stack so that return
12798 address can be reached via (argp - 1) slot. This is needed
12799 to implement macro RETURN_ADDR_RTX and intrinsic function
12800 expand_builtin_return_addr etc. */
12801 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
12802 t = gen_frame_mem (word_mode, t);
12803 insn = emit_insn (gen_push (t));
12804 RTX_FRAME_RELATED_P (insn) = 1;
12806 /* For the purposes of frame and register save area addressing,
12807 we've started over with a new frame. */
12808 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
12809 m->fs.realigned = true;
12813 /* Replicate static chain on the stack so that static chain
12814 can be reached via (argp - 2) slot. This is needed for
12815 nested function with stack realignment. */
12816 insn = emit_insn (gen_push (static_chain));
12817 RTX_FRAME_RELATED_P (insn) = 1;
12821 int_registers_saved = (frame.nregs == 0);
12822 sse_registers_saved = (frame.nsseregs == 0);
12824 if (frame_pointer_needed && !m->fs.fp_valid)
12826 /* Note: AT&T enter does NOT have reversed args. Enter is probably
12827 slower on all targets. Also sdb doesn't like it. */
12828 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
12829 RTX_FRAME_RELATED_P (insn) = 1;
12831 /* Push registers now, before setting the frame pointer
12833 if (!int_registers_saved
12835 && !frame.save_regs_using_mov)
12837 ix86_emit_save_regs ();
12838 int_registers_saved = true;
12839 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
12842 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
12844 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
12845 RTX_FRAME_RELATED_P (insn) = 1;
12847 if (m->fs.cfa_reg == stack_pointer_rtx)
12848 m->fs.cfa_reg = hard_frame_pointer_rtx;
12849 m->fs.fp_offset = m->fs.sp_offset;
12850 m->fs.fp_valid = true;
12854 if (!int_registers_saved)
12856 /* If saving registers via PUSH, do so now. */
12857 if (!frame.save_regs_using_mov)
12859 ix86_emit_save_regs ();
12860 int_registers_saved = true;
12861 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
12864 /* When using red zone we may start register saving before allocating
12865 the stack frame saving one cycle of the prologue. However, avoid
12866 doing this if we have to probe the stack; at least on x86_64 the
12867 stack probe can turn into a call that clobbers a red zone location. */
12868 else if (ix86_using_red_zone ()
12869 && (! TARGET_STACK_PROBE
12870 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
12872 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
12873 int_registers_saved = true;
12877 if (stack_realign_fp)
12879 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
12880 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
12882 /* The computation of the size of the re-aligned stack frame means
12883 that we must allocate the size of the register save area before
12884 performing the actual alignment. Otherwise we cannot guarantee
12885 that there's enough storage above the realignment point. */
12886 if (m->fs.sp_offset != frame.sse_reg_save_offset)
12887 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12888 GEN_INT (m->fs.sp_offset
12889 - frame.sse_reg_save_offset),
12892 /* Align the stack. */
12893 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
12895 GEN_INT (-align_bytes)));
12897 /* For the purposes of register save area addressing, the stack
12898 pointer is no longer valid. As for the value of sp_offset,
12899 see ix86_compute_frame_layout, which we need to match in order
12900 to pass verification of stack_pointer_offset at the end. */
12901 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
12902 m->fs.sp_valid = false;
12905 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
12907 if (flag_stack_usage_info)
12909 /* We start to count from ARG_POINTER. */
12910 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
12912 /* If it was realigned, take into account the fake frame. */
12913 if (stack_realign_drap)
12915 if (ix86_static_chain_on_stack)
12916 stack_size += UNITS_PER_WORD;
12918 if (!call_used_regs[REGNO (crtl->drap_reg)])
12919 stack_size += UNITS_PER_WORD;
12921 /* This over-estimates by 1 minimal-stack-alignment-unit but
12922 mitigates that by counting in the new return address slot. */
12923 current_function_dynamic_stack_size
12924 += crtl->stack_alignment_needed / BITS_PER_UNIT;
12927 current_function_static_stack_size = stack_size;
12930 /* On SEH target with very large frame size, allocate an area to save
12931 SSE registers (as the very large allocation won't be described). */
12933 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
12934 && !sse_registers_saved)
12936 HOST_WIDE_INT sse_size =
12937 frame.sse_reg_save_offset - frame.reg_save_offset;
12939 gcc_assert (int_registers_saved);
12941 /* No need to do stack checking as the area will be immediately
12943 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12944 GEN_INT (-sse_size), -1,
12945 m->fs.cfa_reg == stack_pointer_rtx);
12946 allocate -= sse_size;
12947 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
12948 sse_registers_saved = true;
12951 /* The stack has already been decremented by the instruction calling us
12952 so probe if the size is non-negative to preserve the protection area. */
12953 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
12955 /* We expect the registers to be saved when probes are used. */
12956 gcc_assert (int_registers_saved);
12958 if (STACK_CHECK_MOVING_SP)
12960 if (!(crtl->is_leaf && !cfun->calls_alloca
12961 && allocate <= PROBE_INTERVAL))
12963 ix86_adjust_stack_and_probe (allocate);
12969 HOST_WIDE_INT size = allocate;
12971 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
12972 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
12974 if (TARGET_STACK_PROBE)
12976 if (crtl->is_leaf && !cfun->calls_alloca)
12978 if (size > PROBE_INTERVAL)
12979 ix86_emit_probe_stack_range (0, size);
12982 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
12986 if (crtl->is_leaf && !cfun->calls_alloca)
12988 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
12989 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
12990 size - STACK_CHECK_PROTECT);
12993 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
13000 else if (!ix86_target_stack_probe ()
13001 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
13003 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13004 GEN_INT (-allocate), -1,
13005 m->fs.cfa_reg == stack_pointer_rtx);
13009 rtx eax = gen_rtx_REG (Pmode, AX_REG);
13011 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
13012 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
13013 bool eax_live = ix86_eax_live_at_start_p ();
13014 bool r10_live = false;
13017 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
13021 insn = emit_insn (gen_push (eax));
13022 allocate -= UNITS_PER_WORD;
13023 /* Note that SEH directives need to continue tracking the stack
13024 pointer even after the frame pointer has been set up. */
13025 if (sp_is_cfa_reg || TARGET_SEH)
13028 m->fs.cfa_offset += UNITS_PER_WORD;
13029 RTX_FRAME_RELATED_P (insn) = 1;
13030 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13031 gen_rtx_SET (stack_pointer_rtx,
13032 plus_constant (Pmode, stack_pointer_rtx,
13033 -UNITS_PER_WORD)));
13039 r10 = gen_rtx_REG (Pmode, R10_REG);
13040 insn = emit_insn (gen_push (r10));
13041 allocate -= UNITS_PER_WORD;
13042 if (sp_is_cfa_reg || TARGET_SEH)
13045 m->fs.cfa_offset += UNITS_PER_WORD;
13046 RTX_FRAME_RELATED_P (insn) = 1;
13047 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13048 gen_rtx_SET (stack_pointer_rtx,
13049 plus_constant (Pmode, stack_pointer_rtx,
13050 -UNITS_PER_WORD)));
13054 emit_move_insn (eax, GEN_INT (allocate));
13055 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
13057 /* Use the fact that AX still contains ALLOCATE. */
13058 adjust_stack_insn = (Pmode == DImode
13059 ? gen_pro_epilogue_adjust_stack_di_sub
13060 : gen_pro_epilogue_adjust_stack_si_sub);
13062 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
13063 stack_pointer_rtx, eax));
13065 if (sp_is_cfa_reg || TARGET_SEH)
13068 m->fs.cfa_offset += allocate;
13069 RTX_FRAME_RELATED_P (insn) = 1;
13070 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13071 gen_rtx_SET (stack_pointer_rtx,
13072 plus_constant (Pmode, stack_pointer_rtx,
13075 m->fs.sp_offset += allocate;
13077 /* Use stack_pointer_rtx for relative addressing so that code
13078 works for realigned stack, too. */
13079 if (r10_live && eax_live)
13081 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
13082 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
13083 gen_frame_mem (word_mode, t));
13084 t = plus_constant (Pmode, t, UNITS_PER_WORD);
13085 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
13086 gen_frame_mem (word_mode, t));
13088 else if (eax_live || r10_live)
13090 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
13091 emit_move_insn (gen_rtx_REG (word_mode,
13092 (eax_live ? AX_REG : R10_REG)),
13093 gen_frame_mem (word_mode, t));
13096 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
13098 /* If we havn't already set up the frame pointer, do so now. */
13099 if (frame_pointer_needed && !m->fs.fp_valid)
13101 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
13102 GEN_INT (frame.stack_pointer_offset
13103 - frame.hard_frame_pointer_offset));
13104 insn = emit_insn (insn);
13105 RTX_FRAME_RELATED_P (insn) = 1;
13106 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
13108 if (m->fs.cfa_reg == stack_pointer_rtx)
13109 m->fs.cfa_reg = hard_frame_pointer_rtx;
13110 m->fs.fp_offset = frame.hard_frame_pointer_offset;
13111 m->fs.fp_valid = true;
13114 if (!int_registers_saved)
13115 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
13116 if (!sse_registers_saved)
13117 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13119 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
13121 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
13123 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
13124 insn = emit_insn (gen_set_got (pic));
13125 RTX_FRAME_RELATED_P (insn) = 1;
13126 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
13127 emit_insn (gen_prologue_use (pic));
13128 /* Deleting already emmitted SET_GOT if exist and allocated to
13129 REAL_PIC_OFFSET_TABLE_REGNUM. */
13130 ix86_elim_entry_set_got (pic);
13133 if (crtl->drap_reg && !crtl->stack_realign_needed)
13135 /* vDRAP is setup but after reload it turns out stack realign
13136 isn't necessary, here we will emit prologue to setup DRAP
13137 without stack realign adjustment */
13138 t = choose_baseaddr (0);
13139 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
13142 /* Prevent instructions from being scheduled into register save push
13143 sequence when access to the redzone area is done through frame pointer.
13144 The offset between the frame pointer and the stack pointer is calculated
13145 relative to the value of the stack pointer at the end of the function
13146 prologue, and moving instructions that access redzone area via frame
13147 pointer inside push sequence violates this assumption. */
13148 if (frame_pointer_needed && frame.red_zone_size)
13149 emit_insn (gen_memory_blockage ());
13151 /* Emit cld instruction if stringops are used in the function. */
13152 if (TARGET_CLD && ix86_current_function_needs_cld)
13153 emit_insn (gen_cld ());
13155 /* SEH requires that the prologue end within 256 bytes of the start of
13156 the function. Prevent instruction schedules that would extend that.
13157 Further, prevent alloca modifications to the stack pointer from being
13158 combined with prologue modifications. */
13160 emit_insn (gen_prologue_use (stack_pointer_rtx));
13163 /* Emit code to restore REG using a POP insn. */
13166 ix86_emit_restore_reg_using_pop (rtx reg)
13168 struct machine_function *m = cfun->machine;
13169 rtx_insn *insn = emit_insn (gen_pop (reg));
13171 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
13172 m->fs.sp_offset -= UNITS_PER_WORD;
13174 if (m->fs.cfa_reg == crtl->drap_reg
13175 && REGNO (reg) == REGNO (crtl->drap_reg))
13177 /* Previously we'd represented the CFA as an expression
13178 like *(%ebp - 8). We've just popped that value from
13179 the stack, which means we need to reset the CFA to
13180 the drap register. This will remain until we restore
13181 the stack pointer. */
13182 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
13183 RTX_FRAME_RELATED_P (insn) = 1;
13185 /* This means that the DRAP register is valid for addressing too. */
13186 m->fs.drap_valid = true;
13190 if (m->fs.cfa_reg == stack_pointer_rtx)
13192 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
13193 x = gen_rtx_SET (stack_pointer_rtx, x);
13194 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
13195 RTX_FRAME_RELATED_P (insn) = 1;
13197 m->fs.cfa_offset -= UNITS_PER_WORD;
13200 /* When the frame pointer is the CFA, and we pop it, we are
13201 swapping back to the stack pointer as the CFA. This happens
13202 for stack frames that don't allocate other data, so we assume
13203 the stack pointer is now pointing at the return address, i.e.
13204 the function entry state, which makes the offset be 1 word. */
13205 if (reg == hard_frame_pointer_rtx)
13207 m->fs.fp_valid = false;
13208 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
13210 m->fs.cfa_reg = stack_pointer_rtx;
13211 m->fs.cfa_offset -= UNITS_PER_WORD;
13213 add_reg_note (insn, REG_CFA_DEF_CFA,
13214 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13215 GEN_INT (m->fs.cfa_offset)));
13216 RTX_FRAME_RELATED_P (insn) = 1;
13221 /* Emit code to restore saved registers using POP insns. */
13224 ix86_emit_restore_regs_using_pop (void)
13226 unsigned int regno;
13228 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13229 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false))
13230 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
13233 /* Emit code and notes for the LEAVE instruction. */
13236 ix86_emit_leave (void)
13238 struct machine_function *m = cfun->machine;
13239 rtx_insn *insn = emit_insn (ix86_gen_leave ());
13241 ix86_add_queued_cfa_restore_notes (insn);
13243 gcc_assert (m->fs.fp_valid);
13244 m->fs.sp_valid = true;
13245 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
13246 m->fs.fp_valid = false;
13248 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
13250 m->fs.cfa_reg = stack_pointer_rtx;
13251 m->fs.cfa_offset = m->fs.sp_offset;
13253 add_reg_note (insn, REG_CFA_DEF_CFA,
13254 plus_constant (Pmode, stack_pointer_rtx,
13256 RTX_FRAME_RELATED_P (insn) = 1;
13258 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
13262 /* Emit code to restore saved registers using MOV insns.
13263 First register is restored from CFA - CFA_OFFSET. */
13265 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
13266 bool maybe_eh_return)
13268 struct machine_function *m = cfun->machine;
13269 unsigned int regno;
13271 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13272 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
13274 rtx reg = gen_rtx_REG (word_mode, regno);
13278 mem = choose_baseaddr (cfa_offset);
13279 mem = gen_frame_mem (word_mode, mem);
13280 insn = emit_move_insn (reg, mem);
13282 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
13284 /* Previously we'd represented the CFA as an expression
13285 like *(%ebp - 8). We've just popped that value from
13286 the stack, which means we need to reset the CFA to
13287 the drap register. This will remain until we restore
13288 the stack pointer. */
13289 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
13290 RTX_FRAME_RELATED_P (insn) = 1;
13292 /* This means that the DRAP register is valid for addressing. */
13293 m->fs.drap_valid = true;
13296 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
13298 cfa_offset -= UNITS_PER_WORD;
13302 /* Emit code to restore saved registers using MOV insns.
13303 First register is restored from CFA - CFA_OFFSET. */
13305 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
13306 bool maybe_eh_return)
13308 unsigned int regno;
13310 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13311 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
13313 rtx reg = gen_rtx_REG (V4SFmode, regno);
13315 unsigned int align;
13317 mem = choose_baseaddr (cfa_offset);
13318 mem = gen_rtx_MEM (V4SFmode, mem);
13320 /* The location is aligned up to INCOMING_STACK_BOUNDARY. */
13321 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), INCOMING_STACK_BOUNDARY);
13322 set_mem_align (mem, align);
13324 /* SSE saves are not within re-aligned local stack frame.
13325 In case INCOMING_STACK_BOUNDARY is misaligned, we have
13326 to emit unaligned load. */
13329 rtx unspec = gen_rtx_UNSPEC (V4SFmode, gen_rtvec (1, mem),
13331 emit_insn (gen_rtx_SET (reg, unspec));
13334 emit_insn (gen_rtx_SET (reg, mem));
13336 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
13338 cfa_offset -= GET_MODE_SIZE (V4SFmode);
13342 /* Restore function stack, frame, and registers. */
13345 ix86_expand_epilogue (int style)
13347 struct machine_function *m = cfun->machine;
13348 struct machine_frame_state frame_state_save = m->fs;
13349 struct ix86_frame frame;
13350 bool restore_regs_via_mov;
13353 ix86_finalize_stack_realign_flags ();
13354 ix86_compute_frame_layout (&frame);
13356 m->fs.sp_valid = (!frame_pointer_needed
13357 || (crtl->sp_is_unchanging
13358 && !stack_realign_fp));
13359 gcc_assert (!m->fs.sp_valid
13360 || m->fs.sp_offset == frame.stack_pointer_offset);
13362 /* The FP must be valid if the frame pointer is present. */
13363 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
13364 gcc_assert (!m->fs.fp_valid
13365 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
13367 /* We must have *some* valid pointer to the stack frame. */
13368 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
13370 /* The DRAP is never valid at this point. */
13371 gcc_assert (!m->fs.drap_valid);
13373 /* See the comment about red zone and frame
13374 pointer usage in ix86_expand_prologue. */
13375 if (frame_pointer_needed && frame.red_zone_size)
13376 emit_insn (gen_memory_blockage ());
13378 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
13379 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
13381 /* Determine the CFA offset of the end of the red-zone. */
13382 m->fs.red_zone_offset = 0;
13383 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
13385 /* The red-zone begins below the return address. */
13386 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
13388 /* When the register save area is in the aligned portion of
13389 the stack, determine the maximum runtime displacement that
13390 matches up with the aligned frame. */
13391 if (stack_realign_drap)
13392 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
13396 /* Special care must be taken for the normal return case of a function
13397 using eh_return: the eax and edx registers are marked as saved, but
13398 not restored along this path. Adjust the save location to match. */
13399 if (crtl->calls_eh_return && style != 2)
13400 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
13402 /* EH_RETURN requires the use of moves to function properly. */
13403 if (crtl->calls_eh_return)
13404 restore_regs_via_mov = true;
13405 /* SEH requires the use of pops to identify the epilogue. */
13406 else if (TARGET_SEH)
13407 restore_regs_via_mov = false;
13408 /* If we're only restoring one register and sp is not valid then
13409 using a move instruction to restore the register since it's
13410 less work than reloading sp and popping the register. */
13411 else if (!m->fs.sp_valid && frame.nregs <= 1)
13412 restore_regs_via_mov = true;
13413 else if (TARGET_EPILOGUE_USING_MOVE
13414 && cfun->machine->use_fast_prologue_epilogue
13415 && (frame.nregs > 1
13416 || m->fs.sp_offset != frame.reg_save_offset))
13417 restore_regs_via_mov = true;
13418 else if (frame_pointer_needed
13420 && m->fs.sp_offset != frame.reg_save_offset)
13421 restore_regs_via_mov = true;
13422 else if (frame_pointer_needed
13423 && TARGET_USE_LEAVE
13424 && cfun->machine->use_fast_prologue_epilogue
13425 && frame.nregs == 1)
13426 restore_regs_via_mov = true;
13428 restore_regs_via_mov = false;
13430 if (restore_regs_via_mov || frame.nsseregs)
13432 /* Ensure that the entire register save area is addressable via
13433 the stack pointer, if we will restore via sp. */
13435 && m->fs.sp_offset > 0x7fffffff
13436 && !(m->fs.fp_valid || m->fs.drap_valid)
13437 && (frame.nsseregs + frame.nregs) != 0)
13439 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13440 GEN_INT (m->fs.sp_offset
13441 - frame.sse_reg_save_offset),
13443 m->fs.cfa_reg == stack_pointer_rtx);
13447 /* If there are any SSE registers to restore, then we have to do it
13448 via moves, since there's obviously no pop for SSE regs. */
13449 if (frame.nsseregs)
13450 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
13453 if (restore_regs_via_mov)
13458 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
13460 /* eh_return epilogues need %ecx added to the stack pointer. */
13463 rtx sa = EH_RETURN_STACKADJ_RTX;
13466 /* %ecx can't be used for both DRAP register and eh_return. */
13467 if (crtl->drap_reg)
13468 gcc_assert (REGNO (crtl->drap_reg) != CX_REG);
13470 /* regparm nested functions don't work with eh_return. */
13471 gcc_assert (!ix86_static_chain_on_stack);
13473 if (frame_pointer_needed)
13475 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
13476 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
13477 emit_insn (gen_rtx_SET (sa, t));
13479 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
13480 insn = emit_move_insn (hard_frame_pointer_rtx, t);
13482 /* Note that we use SA as a temporary CFA, as the return
13483 address is at the proper place relative to it. We
13484 pretend this happens at the FP restore insn because
13485 prior to this insn the FP would be stored at the wrong
13486 offset relative to SA, and after this insn we have no
13487 other reasonable register to use for the CFA. We don't
13488 bother resetting the CFA to the SP for the duration of
13489 the return insn. */
13490 add_reg_note (insn, REG_CFA_DEF_CFA,
13491 plus_constant (Pmode, sa, UNITS_PER_WORD));
13492 ix86_add_queued_cfa_restore_notes (insn);
13493 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
13494 RTX_FRAME_RELATED_P (insn) = 1;
13496 m->fs.cfa_reg = sa;
13497 m->fs.cfa_offset = UNITS_PER_WORD;
13498 m->fs.fp_valid = false;
13500 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
13501 const0_rtx, style, false);
13505 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
13506 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
13507 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
13508 ix86_add_queued_cfa_restore_notes (insn);
13510 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
13511 if (m->fs.cfa_offset != UNITS_PER_WORD)
13513 m->fs.cfa_offset = UNITS_PER_WORD;
13514 add_reg_note (insn, REG_CFA_DEF_CFA,
13515 plus_constant (Pmode, stack_pointer_rtx,
13517 RTX_FRAME_RELATED_P (insn) = 1;
13520 m->fs.sp_offset = UNITS_PER_WORD;
13521 m->fs.sp_valid = true;
13526 /* SEH requires that the function end with (1) a stack adjustment
13527 if necessary, (2) a sequence of pops, and (3) a return or
13528 jump instruction. Prevent insns from the function body from
13529 being scheduled into this sequence. */
13532 /* Prevent a catch region from being adjacent to the standard
13533 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
13534 several other flags that would be interesting to test are
13536 if (flag_non_call_exceptions)
13537 emit_insn (gen_nops (const1_rtx));
13539 emit_insn (gen_blockage ());
13542 /* First step is to deallocate the stack frame so that we can
13543 pop the registers. Also do it on SEH target for very large
13544 frame as the emitted instructions aren't allowed by the ABI in
13546 if (!m->fs.sp_valid
13548 && (m->fs.sp_offset - frame.reg_save_offset
13549 >= SEH_MAX_FRAME_SIZE)))
13551 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
13552 GEN_INT (m->fs.fp_offset
13553 - frame.reg_save_offset),
13556 else if (m->fs.sp_offset != frame.reg_save_offset)
13558 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13559 GEN_INT (m->fs.sp_offset
13560 - frame.reg_save_offset),
13562 m->fs.cfa_reg == stack_pointer_rtx);
13565 ix86_emit_restore_regs_using_pop ();
13568 /* If we used a stack pointer and haven't already got rid of it,
13570 if (m->fs.fp_valid)
13572 /* If the stack pointer is valid and pointing at the frame
13573 pointer store address, then we only need a pop. */
13574 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
13575 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
13576 /* Leave results in shorter dependency chains on CPUs that are
13577 able to grok it fast. */
13578 else if (TARGET_USE_LEAVE
13579 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
13580 || !cfun->machine->use_fast_prologue_epilogue)
13581 ix86_emit_leave ();
13584 pro_epilogue_adjust_stack (stack_pointer_rtx,
13585 hard_frame_pointer_rtx,
13586 const0_rtx, style, !using_drap);
13587 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
13593 int param_ptr_offset = UNITS_PER_WORD;
13596 gcc_assert (stack_realign_drap);
13598 if (ix86_static_chain_on_stack)
13599 param_ptr_offset += UNITS_PER_WORD;
13600 if (!call_used_regs[REGNO (crtl->drap_reg)])
13601 param_ptr_offset += UNITS_PER_WORD;
13603 insn = emit_insn (gen_rtx_SET
13604 (stack_pointer_rtx,
13605 gen_rtx_PLUS (Pmode,
13607 GEN_INT (-param_ptr_offset))));
13608 m->fs.cfa_reg = stack_pointer_rtx;
13609 m->fs.cfa_offset = param_ptr_offset;
13610 m->fs.sp_offset = param_ptr_offset;
13611 m->fs.realigned = false;
13613 add_reg_note (insn, REG_CFA_DEF_CFA,
13614 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13615 GEN_INT (param_ptr_offset)));
13616 RTX_FRAME_RELATED_P (insn) = 1;
13618 if (!call_used_regs[REGNO (crtl->drap_reg)])
13619 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
13622 /* At this point the stack pointer must be valid, and we must have
13623 restored all of the registers. We may not have deallocated the
13624 entire stack frame. We've delayed this until now because it may
13625 be possible to merge the local stack deallocation with the
13626 deallocation forced by ix86_static_chain_on_stack. */
13627 gcc_assert (m->fs.sp_valid);
13628 gcc_assert (!m->fs.fp_valid);
13629 gcc_assert (!m->fs.realigned);
13630 if (m->fs.sp_offset != UNITS_PER_WORD)
13632 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13633 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
13637 ix86_add_queued_cfa_restore_notes (get_last_insn ());
13639 /* Sibcall epilogues don't want a return instruction. */
13642 m->fs = frame_state_save;
13646 if (crtl->args.pops_args && crtl->args.size)
13648 rtx popc = GEN_INT (crtl->args.pops_args);
13650 /* i386 can only pop 64K bytes. If asked to pop more, pop return
13651 address, do explicit add, and jump indirectly to the caller. */
13653 if (crtl->args.pops_args >= 65536)
13655 rtx ecx = gen_rtx_REG (SImode, CX_REG);
13658 /* There is no "pascal" calling convention in any 64bit ABI. */
13659 gcc_assert (!TARGET_64BIT);
13661 insn = emit_insn (gen_pop (ecx));
13662 m->fs.cfa_offset -= UNITS_PER_WORD;
13663 m->fs.sp_offset -= UNITS_PER_WORD;
13665 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
13666 x = gen_rtx_SET (stack_pointer_rtx, x);
13667 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
13668 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
13669 RTX_FRAME_RELATED_P (insn) = 1;
13671 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13673 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
13676 emit_jump_insn (gen_simple_return_pop_internal (popc));
13679 emit_jump_insn (gen_simple_return_internal ());
13681 /* Restore the state back to the state from the prologue,
13682 so that it's correct for the next epilogue. */
13683 m->fs = frame_state_save;
13686 /* Reset from the function's potential modifications. */
13689 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
13691 if (pic_offset_table_rtx
13692 && !ix86_use_pseudo_pic_reg ())
13693 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
13695 /* Mach-O doesn't support labels at the end of objects, so if
13696 it looks like we might want one, insert a NOP. */
13698 rtx_insn *insn = get_last_insn ();
13699 rtx_insn *deleted_debug_label = NULL;
13702 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
13704 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
13705 notes only, instead set their CODE_LABEL_NUMBER to -1,
13706 otherwise there would be code generation differences
13707 in between -g and -g0. */
13708 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
13709 deleted_debug_label = insn;
13710 insn = PREV_INSN (insn);
13715 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
13716 fputs ("\tnop\n", file);
13717 else if (deleted_debug_label)
13718 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
13719 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
13720 CODE_LABEL_NUMBER (insn) = -1;
13726 /* Return a scratch register to use in the split stack prologue. The
13727 split stack prologue is used for -fsplit-stack. It is the first
13728 instructions in the function, even before the regular prologue.
13729 The scratch register can be any caller-saved register which is not
13730 used for parameters or for the static chain. */
13732 static unsigned int
13733 split_stack_prologue_scratch_regno (void)
13739 bool is_fastcall, is_thiscall;
13742 is_fastcall = (lookup_attribute ("fastcall",
13743 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
13745 is_thiscall = (lookup_attribute ("thiscall",
13746 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
13748 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
13752 if (DECL_STATIC_CHAIN (cfun->decl))
13754 sorry ("-fsplit-stack does not support fastcall with "
13755 "nested function");
13756 return INVALID_REGNUM;
13760 else if (is_thiscall)
13762 if (!DECL_STATIC_CHAIN (cfun->decl))
13766 else if (regparm < 3)
13768 if (!DECL_STATIC_CHAIN (cfun->decl))
13774 sorry ("-fsplit-stack does not support 2 register "
13775 "parameters for a nested function");
13776 return INVALID_REGNUM;
13783 /* FIXME: We could make this work by pushing a register
13784 around the addition and comparison. */
13785 sorry ("-fsplit-stack does not support 3 register parameters");
13786 return INVALID_REGNUM;
13791 /* A SYMBOL_REF for the function which allocates new stackspace for
13794 static GTY(()) rtx split_stack_fn;
13796 /* A SYMBOL_REF for the more stack function when using the large
13799 static GTY(()) rtx split_stack_fn_large;
13801 /* Handle -fsplit-stack. These are the first instructions in the
13802 function, even before the regular prologue. */
13805 ix86_expand_split_stack_prologue (void)
13807 struct ix86_frame frame;
13808 HOST_WIDE_INT allocate;
13809 unsigned HOST_WIDE_INT args_size;
13810 rtx_code_label *label;
13811 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
13812 rtx scratch_reg = NULL_RTX;
13813 rtx_code_label *varargs_label = NULL;
13816 gcc_assert (flag_split_stack && reload_completed);
13818 ix86_finalize_stack_realign_flags ();
13819 ix86_compute_frame_layout (&frame);
13820 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
13822 /* This is the label we will branch to if we have enough stack
13823 space. We expect the basic block reordering pass to reverse this
13824 branch if optimizing, so that we branch in the unlikely case. */
13825 label = gen_label_rtx ();
13827 /* We need to compare the stack pointer minus the frame size with
13828 the stack boundary in the TCB. The stack boundary always gives
13829 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
13830 can compare directly. Otherwise we need to do an addition. */
13832 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13833 UNSPEC_STACK_CHECK);
13834 limit = gen_rtx_CONST (Pmode, limit);
13835 limit = gen_rtx_MEM (Pmode, limit);
13836 if (allocate < SPLIT_STACK_AVAILABLE)
13837 current = stack_pointer_rtx;
13840 unsigned int scratch_regno;
13843 /* We need a scratch register to hold the stack pointer minus
13844 the required frame size. Since this is the very start of the
13845 function, the scratch register can be any caller-saved
13846 register which is not used for parameters. */
13847 offset = GEN_INT (- allocate);
13848 scratch_regno = split_stack_prologue_scratch_regno ();
13849 if (scratch_regno == INVALID_REGNUM)
13851 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
13852 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
13854 /* We don't use ix86_gen_add3 in this case because it will
13855 want to split to lea, but when not optimizing the insn
13856 will not be split after this point. */
13857 emit_insn (gen_rtx_SET (scratch_reg,
13858 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13863 emit_move_insn (scratch_reg, offset);
13864 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
13865 stack_pointer_rtx));
13867 current = scratch_reg;
13870 ix86_expand_branch (GEU, current, limit, label);
13871 jump_insn = get_last_insn ();
13872 JUMP_LABEL (jump_insn) = label;
13874 /* Mark the jump as very likely to be taken. */
13875 add_int_reg_note (jump_insn, REG_BR_PROB,
13876 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
13878 if (split_stack_fn == NULL_RTX)
13880 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
13881 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
13883 fn = split_stack_fn;
13885 /* Get more stack space. We pass in the desired stack space and the
13886 size of the arguments to copy to the new stack. In 32-bit mode
13887 we push the parameters; __morestack will return on a new stack
13888 anyhow. In 64-bit mode we pass the parameters in r10 and
13890 allocate_rtx = GEN_INT (allocate);
13891 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
13892 call_fusage = NULL_RTX;
13897 reg10 = gen_rtx_REG (Pmode, R10_REG);
13898 reg11 = gen_rtx_REG (Pmode, R11_REG);
13900 /* If this function uses a static chain, it will be in %r10.
13901 Preserve it across the call to __morestack. */
13902 if (DECL_STATIC_CHAIN (cfun->decl))
13906 rax = gen_rtx_REG (word_mode, AX_REG);
13907 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
13908 use_reg (&call_fusage, rax);
13911 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
13914 HOST_WIDE_INT argval;
13916 gcc_assert (Pmode == DImode);
13917 /* When using the large model we need to load the address
13918 into a register, and we've run out of registers. So we
13919 switch to a different calling convention, and we call a
13920 different function: __morestack_large. We pass the
13921 argument size in the upper 32 bits of r10 and pass the
13922 frame size in the lower 32 bits. */
13923 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
13924 gcc_assert ((args_size & 0xffffffff) == args_size);
13926 if (split_stack_fn_large == NULL_RTX)
13928 split_stack_fn_large =
13929 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
13930 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
13932 if (ix86_cmodel == CM_LARGE_PIC)
13934 rtx_code_label *label;
13937 label = gen_label_rtx ();
13938 emit_label (label);
13939 LABEL_PRESERVE_P (label) = 1;
13940 emit_insn (gen_set_rip_rex64 (reg10, label));
13941 emit_insn (gen_set_got_offset_rex64 (reg11, label));
13942 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
13943 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
13945 x = gen_rtx_CONST (Pmode, x);
13946 emit_move_insn (reg11, x);
13947 x = gen_rtx_PLUS (Pmode, reg10, reg11);
13948 x = gen_const_mem (Pmode, x);
13949 emit_move_insn (reg11, x);
13952 emit_move_insn (reg11, split_stack_fn_large);
13956 argval = ((args_size << 16) << 16) + allocate;
13957 emit_move_insn (reg10, GEN_INT (argval));
13961 emit_move_insn (reg10, allocate_rtx);
13962 emit_move_insn (reg11, GEN_INT (args_size));
13963 use_reg (&call_fusage, reg11);
13966 use_reg (&call_fusage, reg10);
13970 emit_insn (gen_push (GEN_INT (args_size)));
13971 emit_insn (gen_push (allocate_rtx));
13973 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
13974 GEN_INT (UNITS_PER_WORD), constm1_rtx,
13976 add_function_usage_to (call_insn, call_fusage);
13978 /* In order to make call/return prediction work right, we now need
13979 to execute a return instruction. See
13980 libgcc/config/i386/morestack.S for the details on how this works.
13982 For flow purposes gcc must not see this as a return
13983 instruction--we need control flow to continue at the subsequent
13984 label. Therefore, we use an unspec. */
13985 gcc_assert (crtl->args.pops_args < 65536);
13986 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
13988 /* If we are in 64-bit mode and this function uses a static chain,
13989 we saved %r10 in %rax before calling _morestack. */
13990 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
13991 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
13992 gen_rtx_REG (word_mode, AX_REG));
13994 /* If this function calls va_start, we need to store a pointer to
13995 the arguments on the old stack, because they may not have been
13996 all copied to the new stack. At this point the old stack can be
13997 found at the frame pointer value used by __morestack, because
13998 __morestack has set that up before calling back to us. Here we
13999 store that pointer in a scratch register, and in
14000 ix86_expand_prologue we store the scratch register in a stack
14002 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14004 unsigned int scratch_regno;
14008 scratch_regno = split_stack_prologue_scratch_regno ();
14009 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
14010 frame_reg = gen_rtx_REG (Pmode, BP_REG);
14014 return address within this function
14015 return address of caller of this function
14017 So we add three words to get to the stack arguments.
14021 return address within this function
14022 first argument to __morestack
14023 second argument to __morestack
14024 return address of caller of this function
14026 So we add five words to get to the stack arguments.
14028 words = TARGET_64BIT ? 3 : 5;
14029 emit_insn (gen_rtx_SET (scratch_reg,
14030 gen_rtx_PLUS (Pmode, frame_reg,
14031 GEN_INT (words * UNITS_PER_WORD))));
14033 varargs_label = gen_label_rtx ();
14034 emit_jump_insn (gen_jump (varargs_label));
14035 JUMP_LABEL (get_last_insn ()) = varargs_label;
14040 emit_label (label);
14041 LABEL_NUSES (label) = 1;
14043 /* If this function calls va_start, we now have to set the scratch
14044 register for the case where we do not call __morestack. In this
14045 case we need to set it based on the stack pointer. */
14046 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14048 emit_insn (gen_rtx_SET (scratch_reg,
14049 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14050 GEN_INT (UNITS_PER_WORD))));
14052 emit_label (varargs_label);
14053 LABEL_NUSES (varargs_label) = 1;
14057 /* We may have to tell the dataflow pass that the split stack prologue
14058 is initializing a scratch register. */
14061 ix86_live_on_entry (bitmap regs)
14063 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14065 gcc_assert (flag_split_stack);
14066 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
14070 /* Extract the parts of an RTL expression that is a valid memory address
14071 for an instruction. Return 0 if the structure of the address is
14072 grossly off. Return -1 if the address contains ASHIFT, so it is not
14073 strictly valid, but still used for computing length of lea instruction. */
14076 ix86_decompose_address (rtx addr, struct ix86_address *out)
14078 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
14079 rtx base_reg, index_reg;
14080 HOST_WIDE_INT scale = 1;
14081 rtx scale_rtx = NULL_RTX;
14084 addr_space_t seg = ADDR_SPACE_GENERIC;
14086 /* Allow zero-extended SImode addresses,
14087 they will be emitted with addr32 prefix. */
14088 if (TARGET_64BIT && GET_MODE (addr) == DImode)
14090 if (GET_CODE (addr) == ZERO_EXTEND
14091 && GET_MODE (XEXP (addr, 0)) == SImode)
14093 addr = XEXP (addr, 0);
14094 if (CONST_INT_P (addr))
14097 else if (GET_CODE (addr) == AND
14098 && const_32bit_mask (XEXP (addr, 1), DImode))
14100 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
14101 if (addr == NULL_RTX)
14104 if (CONST_INT_P (addr))
14109 /* Allow SImode subregs of DImode addresses,
14110 they will be emitted with addr32 prefix. */
14111 if (TARGET_64BIT && GET_MODE (addr) == SImode)
14113 if (SUBREG_P (addr)
14114 && GET_MODE (SUBREG_REG (addr)) == DImode)
14116 addr = SUBREG_REG (addr);
14117 if (CONST_INT_P (addr))
14124 else if (SUBREG_P (addr))
14126 if (REG_P (SUBREG_REG (addr)))
14131 else if (GET_CODE (addr) == PLUS)
14133 rtx addends[4], op;
14141 addends[n++] = XEXP (op, 1);
14144 while (GET_CODE (op) == PLUS);
14149 for (i = n; i >= 0; --i)
14152 switch (GET_CODE (op))
14157 index = XEXP (op, 0);
14158 scale_rtx = XEXP (op, 1);
14164 index = XEXP (op, 0);
14165 tmp = XEXP (op, 1);
14166 if (!CONST_INT_P (tmp))
14168 scale = INTVAL (tmp);
14169 if ((unsigned HOST_WIDE_INT) scale > 3)
14171 scale = 1 << scale;
14176 if (GET_CODE (op) != UNSPEC)
14181 if (XINT (op, 1) == UNSPEC_TP
14182 && TARGET_TLS_DIRECT_SEG_REFS
14183 && seg == ADDR_SPACE_GENERIC)
14184 seg = DEFAULT_TLS_SEG_REG;
14190 if (!REG_P (SUBREG_REG (op)))
14217 else if (GET_CODE (addr) == MULT)
14219 index = XEXP (addr, 0); /* index*scale */
14220 scale_rtx = XEXP (addr, 1);
14222 else if (GET_CODE (addr) == ASHIFT)
14224 /* We're called for lea too, which implements ashift on occasion. */
14225 index = XEXP (addr, 0);
14226 tmp = XEXP (addr, 1);
14227 if (!CONST_INT_P (tmp))
14229 scale = INTVAL (tmp);
14230 if ((unsigned HOST_WIDE_INT) scale > 3)
14232 scale = 1 << scale;
14236 disp = addr; /* displacement */
14242 else if (SUBREG_P (index)
14243 && REG_P (SUBREG_REG (index)))
14249 /* Extract the integral value of scale. */
14252 if (!CONST_INT_P (scale_rtx))
14254 scale = INTVAL (scale_rtx);
14257 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
14258 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
14260 /* Avoid useless 0 displacement. */
14261 if (disp == const0_rtx && (base || index))
14264 /* Allow arg pointer and stack pointer as index if there is not scaling. */
14265 if (base_reg && index_reg && scale == 1
14266 && (index_reg == arg_pointer_rtx
14267 || index_reg == frame_pointer_rtx
14268 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
14270 std::swap (base, index);
14271 std::swap (base_reg, index_reg);
14274 /* Special case: %ebp cannot be encoded as a base without a displacement.
14278 && (base_reg == hard_frame_pointer_rtx
14279 || base_reg == frame_pointer_rtx
14280 || base_reg == arg_pointer_rtx
14281 || (REG_P (base_reg)
14282 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
14283 || REGNO (base_reg) == R13_REG))))
14286 /* Special case: on K6, [%esi] makes the instruction vector decoded.
14287 Avoid this by transforming to [%esi+0].
14288 Reload calls address legitimization without cfun defined, so we need
14289 to test cfun for being non-NULL. */
14290 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
14291 && base_reg && !index_reg && !disp
14292 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
14295 /* Special case: encode reg+reg instead of reg*2. */
14296 if (!base && index && scale == 2)
14297 base = index, base_reg = index_reg, scale = 1;
14299 /* Special case: scaling cannot be encoded without base or displacement. */
14300 if (!base && !disp && index && scale != 1)
14304 out->index = index;
14306 out->scale = scale;
14312 /* Return cost of the memory address x.
14313 For i386, it is better to use a complex address than let gcc copy
14314 the address into a reg and make a new pseudo. But not if the address
14315 requires to two regs - that would mean more pseudos with longer
14318 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
14320 struct ix86_address parts;
14322 int ok = ix86_decompose_address (x, &parts);
14326 if (parts.base && SUBREG_P (parts.base))
14327 parts.base = SUBREG_REG (parts.base);
14328 if (parts.index && SUBREG_P (parts.index))
14329 parts.index = SUBREG_REG (parts.index);
14331 /* Attempt to minimize number of registers in the address by increasing
14332 address cost for each used register. We don't increase address cost
14333 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
14334 is not invariant itself it most likely means that base or index is not
14335 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
14336 which is not profitable for x86. */
14338 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
14339 && (current_pass->type == GIMPLE_PASS
14340 || !pic_offset_table_rtx
14341 || !REG_P (parts.base)
14342 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
14346 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
14347 && (current_pass->type == GIMPLE_PASS
14348 || !pic_offset_table_rtx
14349 || !REG_P (parts.index)
14350 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
14353 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
14354 since it's predecode logic can't detect the length of instructions
14355 and it degenerates to vector decoded. Increase cost of such
14356 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
14357 to split such addresses or even refuse such addresses at all.
14359 Following addressing modes are affected:
14364 The first and last case may be avoidable by explicitly coding the zero in
14365 memory address, but I don't have AMD-K6 machine handy to check this
14369 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
14370 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
14371 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
14377 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
14378 this is used for to form addresses to local data when -fPIC is in
14382 darwin_local_data_pic (rtx disp)
14384 return (GET_CODE (disp) == UNSPEC
14385 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
14388 /* Determine if a given RTX is a valid constant. We already know this
14389 satisfies CONSTANT_P. */
14392 ix86_legitimate_constant_p (machine_mode, rtx x)
14394 /* Pointer bounds constants are not valid. */
14395 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
14398 switch (GET_CODE (x))
14403 if (GET_CODE (x) == PLUS)
14405 if (!CONST_INT_P (XEXP (x, 1)))
14410 if (TARGET_MACHO && darwin_local_data_pic (x))
14413 /* Only some unspecs are valid as "constants". */
14414 if (GET_CODE (x) == UNSPEC)
14415 switch (XINT (x, 1))
14418 case UNSPEC_GOTOFF:
14419 case UNSPEC_PLTOFF:
14420 return TARGET_64BIT;
14422 case UNSPEC_NTPOFF:
14423 x = XVECEXP (x, 0, 0);
14424 return (GET_CODE (x) == SYMBOL_REF
14425 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
14426 case UNSPEC_DTPOFF:
14427 x = XVECEXP (x, 0, 0);
14428 return (GET_CODE (x) == SYMBOL_REF
14429 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
14434 /* We must have drilled down to a symbol. */
14435 if (GET_CODE (x) == LABEL_REF)
14437 if (GET_CODE (x) != SYMBOL_REF)
14442 /* TLS symbols are never valid. */
14443 if (SYMBOL_REF_TLS_MODEL (x))
14446 /* DLLIMPORT symbols are never valid. */
14447 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14448 && SYMBOL_REF_DLLIMPORT_P (x))
14452 /* mdynamic-no-pic */
14453 if (MACHO_DYNAMIC_NO_PIC_P)
14454 return machopic_symbol_defined_p (x);
14458 case CONST_WIDE_INT:
14459 if (!TARGET_64BIT && !standard_sse_constant_p (x))
14464 if (!standard_sse_constant_p (x))
14471 /* Otherwise we handle everything else in the move patterns. */
14475 /* Determine if it's legal to put X into the constant pool. This
14476 is not possible for the address of thread-local symbols, which
14477 is checked above. */
14480 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
14482 /* We can always put integral constants and vectors in memory. */
14483 switch (GET_CODE (x))
14486 case CONST_WIDE_INT:
14494 return !ix86_legitimate_constant_p (mode, x);
14497 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
14501 is_imported_p (rtx x)
14503 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
14504 || GET_CODE (x) != SYMBOL_REF)
14507 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
14511 /* Nonzero if the constant value X is a legitimate general operand
14512 when generating PIC code. It is given that flag_pic is on and
14513 that X satisfies CONSTANT_P. */
14516 legitimate_pic_operand_p (rtx x)
14520 switch (GET_CODE (x))
14523 inner = XEXP (x, 0);
14524 if (GET_CODE (inner) == PLUS
14525 && CONST_INT_P (XEXP (inner, 1)))
14526 inner = XEXP (inner, 0);
14528 /* Only some unspecs are valid as "constants". */
14529 if (GET_CODE (inner) == UNSPEC)
14530 switch (XINT (inner, 1))
14533 case UNSPEC_GOTOFF:
14534 case UNSPEC_PLTOFF:
14535 return TARGET_64BIT;
14537 x = XVECEXP (inner, 0, 0);
14538 return (GET_CODE (x) == SYMBOL_REF
14539 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
14540 case UNSPEC_MACHOPIC_OFFSET:
14541 return legitimate_pic_address_disp_p (x);
14549 return legitimate_pic_address_disp_p (x);
14556 /* Determine if a given CONST RTX is a valid memory displacement
14560 legitimate_pic_address_disp_p (rtx disp)
14564 /* In 64bit mode we can allow direct addresses of symbols and labels
14565 when they are not dynamic symbols. */
14568 rtx op0 = disp, op1;
14570 switch (GET_CODE (disp))
14576 if (GET_CODE (XEXP (disp, 0)) != PLUS)
14578 op0 = XEXP (XEXP (disp, 0), 0);
14579 op1 = XEXP (XEXP (disp, 0), 1);
14580 if (!CONST_INT_P (op1)
14581 || INTVAL (op1) >= 16*1024*1024
14582 || INTVAL (op1) < -16*1024*1024)
14584 if (GET_CODE (op0) == LABEL_REF)
14586 if (GET_CODE (op0) == CONST
14587 && GET_CODE (XEXP (op0, 0)) == UNSPEC
14588 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
14590 if (GET_CODE (op0) == UNSPEC
14591 && XINT (op0, 1) == UNSPEC_PCREL)
14593 if (GET_CODE (op0) != SYMBOL_REF)
14598 /* TLS references should always be enclosed in UNSPEC.
14599 The dllimported symbol needs always to be resolved. */
14600 if (SYMBOL_REF_TLS_MODEL (op0)
14601 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
14606 if (is_imported_p (op0))
14609 if (SYMBOL_REF_FAR_ADDR_P (op0)
14610 || !SYMBOL_REF_LOCAL_P (op0))
14613 /* Function-symbols need to be resolved only for
14615 For the small-model we don't need to resolve anything
14617 if ((ix86_cmodel != CM_LARGE_PIC
14618 && SYMBOL_REF_FUNCTION_P (op0))
14619 || ix86_cmodel == CM_SMALL_PIC)
14621 /* Non-external symbols don't need to be resolved for
14622 large, and medium-model. */
14623 if ((ix86_cmodel == CM_LARGE_PIC
14624 || ix86_cmodel == CM_MEDIUM_PIC)
14625 && !SYMBOL_REF_EXTERNAL_P (op0))
14628 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
14629 && (SYMBOL_REF_LOCAL_P (op0)
14630 || (HAVE_LD_PIE_COPYRELOC
14632 && !SYMBOL_REF_WEAK (op0)
14633 && !SYMBOL_REF_FUNCTION_P (op0)))
14634 && ix86_cmodel != CM_LARGE_PIC)
14642 if (GET_CODE (disp) != CONST)
14644 disp = XEXP (disp, 0);
14648 /* We are unsafe to allow PLUS expressions. This limit allowed distance
14649 of GOT tables. We should not need these anyway. */
14650 if (GET_CODE (disp) != UNSPEC
14651 || (XINT (disp, 1) != UNSPEC_GOTPCREL
14652 && XINT (disp, 1) != UNSPEC_GOTOFF
14653 && XINT (disp, 1) != UNSPEC_PCREL
14654 && XINT (disp, 1) != UNSPEC_PLTOFF))
14657 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
14658 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
14664 if (GET_CODE (disp) == PLUS)
14666 if (!CONST_INT_P (XEXP (disp, 1)))
14668 disp = XEXP (disp, 0);
14672 if (TARGET_MACHO && darwin_local_data_pic (disp))
14675 if (GET_CODE (disp) != UNSPEC)
14678 switch (XINT (disp, 1))
14683 /* We need to check for both symbols and labels because VxWorks loads
14684 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
14686 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
14687 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
14688 case UNSPEC_GOTOFF:
14689 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
14690 While ABI specify also 32bit relocation but we don't produce it in
14691 small PIC model at all. */
14692 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
14693 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
14695 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
14697 case UNSPEC_GOTTPOFF:
14698 case UNSPEC_GOTNTPOFF:
14699 case UNSPEC_INDNTPOFF:
14702 disp = XVECEXP (disp, 0, 0);
14703 return (GET_CODE (disp) == SYMBOL_REF
14704 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
14705 case UNSPEC_NTPOFF:
14706 disp = XVECEXP (disp, 0, 0);
14707 return (GET_CODE (disp) == SYMBOL_REF
14708 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
14709 case UNSPEC_DTPOFF:
14710 disp = XVECEXP (disp, 0, 0);
14711 return (GET_CODE (disp) == SYMBOL_REF
14712 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
14718 /* Determine if op is suitable RTX for an address register.
14719 Return naked register if a register or a register subreg is
14720 found, otherwise return NULL_RTX. */
14723 ix86_validate_address_register (rtx op)
14725 machine_mode mode = GET_MODE (op);
14727 /* Only SImode or DImode registers can form the address. */
14728 if (mode != SImode && mode != DImode)
14733 else if (SUBREG_P (op))
14735 rtx reg = SUBREG_REG (op);
14740 mode = GET_MODE (reg);
14742 /* Don't allow SUBREGs that span more than a word. It can
14743 lead to spill failures when the register is one word out
14744 of a two word structure. */
14745 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
14748 /* Allow only SUBREGs of non-eliminable hard registers. */
14749 if (register_no_elim_operand (reg, mode))
14753 /* Op is not a register. */
14757 /* Recognizes RTL expressions that are valid memory addresses for an
14758 instruction. The MODE argument is the machine mode for the MEM
14759 expression that wants to use this address.
14761 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
14762 convert common non-canonical forms to canonical form so that they will
14766 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
14768 struct ix86_address parts;
14769 rtx base, index, disp;
14770 HOST_WIDE_INT scale;
14773 if (ix86_decompose_address (addr, &parts) <= 0)
14774 /* Decomposition failed. */
14778 index = parts.index;
14780 scale = parts.scale;
14783 /* Validate base register. */
14786 rtx reg = ix86_validate_address_register (base);
14788 if (reg == NULL_RTX)
14791 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
14792 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
14793 /* Base is not valid. */
14797 /* Validate index register. */
14800 rtx reg = ix86_validate_address_register (index);
14802 if (reg == NULL_RTX)
14805 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
14806 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
14807 /* Index is not valid. */
14811 /* Index and base should have the same mode. */
14813 && GET_MODE (base) != GET_MODE (index))
14816 /* Address override works only on the (%reg) part of %fs:(%reg). */
14817 if (seg != ADDR_SPACE_GENERIC
14818 && ((base && GET_MODE (base) != word_mode)
14819 || (index && GET_MODE (index) != word_mode)))
14822 /* Validate scale factor. */
14826 /* Scale without index. */
14829 if (scale != 2 && scale != 4 && scale != 8)
14830 /* Scale is not a valid multiplier. */
14834 /* Validate displacement. */
14837 if (GET_CODE (disp) == CONST
14838 && GET_CODE (XEXP (disp, 0)) == UNSPEC
14839 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
14840 switch (XINT (XEXP (disp, 0), 1))
14842 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
14843 used. While ABI specify also 32bit relocations, we don't produce
14844 them at all and use IP relative instead. */
14846 case UNSPEC_GOTOFF:
14847 gcc_assert (flag_pic);
14849 goto is_legitimate_pic;
14851 /* 64bit address unspec. */
14854 case UNSPEC_GOTPCREL:
14856 gcc_assert (flag_pic);
14857 goto is_legitimate_pic;
14859 case UNSPEC_GOTTPOFF:
14860 case UNSPEC_GOTNTPOFF:
14861 case UNSPEC_INDNTPOFF:
14862 case UNSPEC_NTPOFF:
14863 case UNSPEC_DTPOFF:
14866 case UNSPEC_STACK_CHECK:
14867 gcc_assert (flag_split_stack);
14871 /* Invalid address unspec. */
14875 else if (SYMBOLIC_CONST (disp)
14879 && MACHOPIC_INDIRECT
14880 && !machopic_operand_p (disp)
14886 if (TARGET_64BIT && (index || base))
14888 /* foo@dtpoff(%rX) is ok. */
14889 if (GET_CODE (disp) != CONST
14890 || GET_CODE (XEXP (disp, 0)) != PLUS
14891 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
14892 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
14893 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
14894 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
14895 /* Non-constant pic memory reference. */
14898 else if ((!TARGET_MACHO || flag_pic)
14899 && ! legitimate_pic_address_disp_p (disp))
14900 /* Displacement is an invalid pic construct. */
14903 else if (MACHO_DYNAMIC_NO_PIC_P
14904 && !ix86_legitimate_constant_p (Pmode, disp))
14905 /* displacment must be referenced via non_lazy_pointer */
14909 /* This code used to verify that a symbolic pic displacement
14910 includes the pic_offset_table_rtx register.
14912 While this is good idea, unfortunately these constructs may
14913 be created by "adds using lea" optimization for incorrect
14922 This code is nonsensical, but results in addressing
14923 GOT table with pic_offset_table_rtx base. We can't
14924 just refuse it easily, since it gets matched by
14925 "addsi3" pattern, that later gets split to lea in the
14926 case output register differs from input. While this
14927 can be handled by separate addsi pattern for this case
14928 that never results in lea, this seems to be easier and
14929 correct fix for crash to disable this test. */
14931 else if (GET_CODE (disp) != LABEL_REF
14932 && !CONST_INT_P (disp)
14933 && (GET_CODE (disp) != CONST
14934 || !ix86_legitimate_constant_p (Pmode, disp))
14935 && (GET_CODE (disp) != SYMBOL_REF
14936 || !ix86_legitimate_constant_p (Pmode, disp)))
14937 /* Displacement is not constant. */
14939 else if (TARGET_64BIT
14940 && !x86_64_immediate_operand (disp, VOIDmode))
14941 /* Displacement is out of range. */
14943 /* In x32 mode, constant addresses are sign extended to 64bit, so
14944 we have to prevent addresses from 0x80000000 to 0xffffffff. */
14945 else if (TARGET_X32 && !(index || base)
14946 && CONST_INT_P (disp)
14947 && val_signbit_known_set_p (SImode, INTVAL (disp)))
14951 /* Everything looks valid. */
14955 /* Determine if a given RTX is a valid constant address. */
14958 constant_address_p (rtx x)
14960 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
14963 /* Return a unique alias set for the GOT. */
14965 static alias_set_type
14966 ix86_GOT_alias_set (void)
14968 static alias_set_type set = -1;
14970 set = new_alias_set ();
14974 /* Return a legitimate reference for ORIG (an address) using the
14975 register REG. If REG is 0, a new pseudo is generated.
14977 There are two types of references that must be handled:
14979 1. Global data references must load the address from the GOT, via
14980 the PIC reg. An insn is emitted to do this load, and the reg is
14983 2. Static data references, constant pool addresses, and code labels
14984 compute the address as an offset from the GOT, whose base is in
14985 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
14986 differentiate them from global data objects. The returned
14987 address is the PIC reg + an unspec constant.
14989 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
14990 reg also appears in the address. */
14993 legitimize_pic_address (rtx orig, rtx reg)
14996 rtx new_rtx = orig;
14999 if (TARGET_MACHO && !TARGET_64BIT)
15002 reg = gen_reg_rtx (Pmode);
15003 /* Use the generic Mach-O PIC machinery. */
15004 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
15008 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
15010 rtx tmp = legitimize_pe_coff_symbol (addr, true);
15015 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
15017 else if (TARGET_64BIT && !TARGET_PECOFF
15018 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
15021 /* This symbol may be referenced via a displacement from the PIC
15022 base address (@GOTOFF). */
15024 if (GET_CODE (addr) == CONST)
15025 addr = XEXP (addr, 0);
15026 if (GET_CODE (addr) == PLUS)
15028 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
15030 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
15033 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
15034 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15036 tmpreg = gen_reg_rtx (Pmode);
15039 emit_move_insn (tmpreg, new_rtx);
15043 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
15044 tmpreg, 1, OPTAB_DIRECT);
15048 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
15050 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
15052 /* This symbol may be referenced via a displacement from the PIC
15053 base address (@GOTOFF). */
15055 if (GET_CODE (addr) == CONST)
15056 addr = XEXP (addr, 0);
15057 if (GET_CODE (addr) == PLUS)
15059 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
15061 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
15064 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
15065 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15066 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15070 emit_move_insn (reg, new_rtx);
15074 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
15075 /* We can't use @GOTOFF for text labels on VxWorks;
15076 see gotoff_operand. */
15077 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
15079 rtx tmp = legitimize_pe_coff_symbol (addr, true);
15083 /* For x64 PE-COFF there is no GOT table. So we use address
15085 if (TARGET_64BIT && TARGET_PECOFF)
15087 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
15088 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15091 reg = gen_reg_rtx (Pmode);
15092 emit_move_insn (reg, new_rtx);
15095 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
15097 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
15098 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15099 new_rtx = gen_const_mem (Pmode, new_rtx);
15100 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
15103 reg = gen_reg_rtx (Pmode);
15104 /* Use directly gen_movsi, otherwise the address is loaded
15105 into register for CSE. We don't want to CSE this addresses,
15106 instead we CSE addresses from the GOT table, so skip this. */
15107 emit_insn (gen_movsi (reg, new_rtx));
15112 /* This symbol must be referenced via a load from the
15113 Global Offset Table (@GOT). */
15115 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
15116 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15118 new_rtx = force_reg (Pmode, new_rtx);
15119 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15120 new_rtx = gen_const_mem (Pmode, new_rtx);
15121 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
15124 reg = gen_reg_rtx (Pmode);
15125 emit_move_insn (reg, new_rtx);
15131 if (CONST_INT_P (addr)
15132 && !x86_64_immediate_operand (addr, VOIDmode))
15136 emit_move_insn (reg, addr);
15140 new_rtx = force_reg (Pmode, addr);
15142 else if (GET_CODE (addr) == CONST)
15144 addr = XEXP (addr, 0);
15146 /* We must match stuff we generate before. Assume the only
15147 unspecs that can get here are ours. Not that we could do
15148 anything with them anyway.... */
15149 if (GET_CODE (addr) == UNSPEC
15150 || (GET_CODE (addr) == PLUS
15151 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
15153 gcc_assert (GET_CODE (addr) == PLUS);
15155 if (GET_CODE (addr) == PLUS)
15157 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
15159 /* Check first to see if this is a constant offset from a @GOTOFF
15160 symbol reference. */
15161 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
15162 && CONST_INT_P (op1))
15166 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
15168 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
15169 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15170 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15174 emit_move_insn (reg, new_rtx);
15180 if (INTVAL (op1) < -16*1024*1024
15181 || INTVAL (op1) >= 16*1024*1024)
15183 if (!x86_64_immediate_operand (op1, Pmode))
15184 op1 = force_reg (Pmode, op1);
15185 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
15191 rtx base = legitimize_pic_address (op0, reg);
15192 machine_mode mode = GET_MODE (base);
15194 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
15196 if (CONST_INT_P (new_rtx))
15198 if (INTVAL (new_rtx) < -16*1024*1024
15199 || INTVAL (new_rtx) >= 16*1024*1024)
15201 if (!x86_64_immediate_operand (new_rtx, mode))
15202 new_rtx = force_reg (mode, new_rtx);
15204 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
15207 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
15211 /* For %rip addressing, we have to use just disp32, not
15214 && (GET_CODE (base) == SYMBOL_REF
15215 || GET_CODE (base) == LABEL_REF))
15216 base = force_reg (mode, base);
15217 if (GET_CODE (new_rtx) == PLUS
15218 && CONSTANT_P (XEXP (new_rtx, 1)))
15220 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
15221 new_rtx = XEXP (new_rtx, 1);
15223 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
15231 /* Load the thread pointer. If TO_REG is true, force it into a register. */
15234 get_thread_pointer (machine_mode tp_mode, bool to_reg)
15236 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
15238 if (GET_MODE (tp) != tp_mode)
15240 gcc_assert (GET_MODE (tp) == SImode);
15241 gcc_assert (tp_mode == DImode);
15243 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
15247 tp = copy_to_mode_reg (tp_mode, tp);
15252 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15254 static GTY(()) rtx ix86_tls_symbol;
15257 ix86_tls_get_addr (void)
15259 if (!ix86_tls_symbol)
15262 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
15263 ? "___tls_get_addr" : "__tls_get_addr");
15265 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
15268 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
15270 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
15272 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
15273 gen_rtx_CONST (Pmode, unspec));
15276 return ix86_tls_symbol;
15279 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15281 static GTY(()) rtx ix86_tls_module_base_symbol;
15284 ix86_tls_module_base (void)
15286 if (!ix86_tls_module_base_symbol)
15288 ix86_tls_module_base_symbol
15289 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
15291 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15292 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15295 return ix86_tls_module_base_symbol;
15298 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
15299 false if we expect this to be used for a memory address and true if
15300 we expect to load the address into a register. */
15303 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
15305 rtx dest, base, off;
15306 rtx pic = NULL_RTX, tp = NULL_RTX;
15307 machine_mode tp_mode = Pmode;
15310 /* Fall back to global dynamic model if tool chain cannot support local
15312 if (TARGET_SUN_TLS && !TARGET_64BIT
15313 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
15314 && model == TLS_MODEL_LOCAL_DYNAMIC)
15315 model = TLS_MODEL_GLOBAL_DYNAMIC;
15319 case TLS_MODEL_GLOBAL_DYNAMIC:
15320 dest = gen_reg_rtx (Pmode);
15324 if (flag_pic && !TARGET_PECOFF)
15325 pic = pic_offset_table_rtx;
15328 pic = gen_reg_rtx (Pmode);
15329 emit_insn (gen_set_got (pic));
15333 if (TARGET_GNU2_TLS)
15336 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
15338 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
15340 tp = get_thread_pointer (Pmode, true);
15341 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
15343 if (GET_MODE (x) != Pmode)
15344 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15346 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
15350 rtx caddr = ix86_tls_get_addr ();
15354 rtx rax = gen_rtx_REG (Pmode, AX_REG);
15359 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
15360 insns = get_insns ();
15363 if (GET_MODE (x) != Pmode)
15364 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15366 RTL_CONST_CALL_P (insns) = 1;
15367 emit_libcall_block (insns, dest, rax, x);
15370 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
15374 case TLS_MODEL_LOCAL_DYNAMIC:
15375 base = gen_reg_rtx (Pmode);
15380 pic = pic_offset_table_rtx;
15383 pic = gen_reg_rtx (Pmode);
15384 emit_insn (gen_set_got (pic));
15388 if (TARGET_GNU2_TLS)
15390 rtx tmp = ix86_tls_module_base ();
15393 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
15395 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
15397 tp = get_thread_pointer (Pmode, true);
15398 set_unique_reg_note (get_last_insn (), REG_EQUAL,
15399 gen_rtx_MINUS (Pmode, tmp, tp));
15403 rtx caddr = ix86_tls_get_addr ();
15407 rtx rax = gen_rtx_REG (Pmode, AX_REG);
15413 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
15414 insns = get_insns ();
15417 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
15418 share the LD_BASE result with other LD model accesses. */
15419 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
15420 UNSPEC_TLS_LD_BASE);
15422 RTL_CONST_CALL_P (insns) = 1;
15423 emit_libcall_block (insns, base, rax, eqv);
15426 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
15429 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
15430 off = gen_rtx_CONST (Pmode, off);
15432 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
15434 if (TARGET_GNU2_TLS)
15436 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
15438 if (GET_MODE (x) != Pmode)
15439 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15441 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
15445 case TLS_MODEL_INITIAL_EXEC:
15448 if (TARGET_SUN_TLS && !TARGET_X32)
15450 /* The Sun linker took the AMD64 TLS spec literally
15451 and can only handle %rax as destination of the
15452 initial executable code sequence. */
15454 dest = gen_reg_rtx (DImode);
15455 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
15459 /* Generate DImode references to avoid %fs:(%reg32)
15460 problems and linker IE->LE relaxation bug. */
15463 type = UNSPEC_GOTNTPOFF;
15467 pic = pic_offset_table_rtx;
15468 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
15470 else if (!TARGET_ANY_GNU_TLS)
15472 pic = gen_reg_rtx (Pmode);
15473 emit_insn (gen_set_got (pic));
15474 type = UNSPEC_GOTTPOFF;
15479 type = UNSPEC_INDNTPOFF;
15482 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
15483 off = gen_rtx_CONST (tp_mode, off);
15485 off = gen_rtx_PLUS (tp_mode, pic, off);
15486 off = gen_const_mem (tp_mode, off);
15487 set_mem_alias_set (off, ix86_GOT_alias_set ());
15489 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15491 base = get_thread_pointer (tp_mode,
15492 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
15493 off = force_reg (tp_mode, off);
15494 return gen_rtx_PLUS (tp_mode, base, off);
15498 base = get_thread_pointer (Pmode, true);
15499 dest = gen_reg_rtx (Pmode);
15500 emit_insn (ix86_gen_sub3 (dest, base, off));
15504 case TLS_MODEL_LOCAL_EXEC:
15505 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
15506 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15507 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
15508 off = gen_rtx_CONST (Pmode, off);
15510 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15512 base = get_thread_pointer (Pmode,
15513 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
15514 return gen_rtx_PLUS (Pmode, base, off);
15518 base = get_thread_pointer (Pmode, true);
15519 dest = gen_reg_rtx (Pmode);
15520 emit_insn (ix86_gen_sub3 (dest, base, off));
15525 gcc_unreachable ();
15531 /* Create or return the unique __imp_DECL dllimport symbol corresponding
15532 to symbol DECL if BEIMPORT is true. Otherwise create or return the
15533 unique refptr-DECL symbol corresponding to symbol DECL. */
15535 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
15537 static inline hashval_t hash (tree_map *m) { return m->hash; }
15539 equal (tree_map *a, tree_map *b)
15541 return a->base.from == b->base.from;
15545 keep_cache_entry (tree_map *&m)
15547 return ggc_marked_p (m->base.from);
15551 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
15554 get_dllimport_decl (tree decl, bool beimport)
15556 struct tree_map *h, in;
15558 const char *prefix;
15559 size_t namelen, prefixlen;
15564 if (!dllimport_map)
15565 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
15567 in.hash = htab_hash_pointer (decl);
15568 in.base.from = decl;
15569 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
15574 *loc = h = ggc_alloc<tree_map> ();
15576 h->base.from = decl;
15577 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
15578 VAR_DECL, NULL, ptr_type_node);
15579 DECL_ARTIFICIAL (to) = 1;
15580 DECL_IGNORED_P (to) = 1;
15581 DECL_EXTERNAL (to) = 1;
15582 TREE_READONLY (to) = 1;
15584 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
15585 name = targetm.strip_name_encoding (name);
15587 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
15588 ? "*__imp_" : "*__imp__";
15590 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
15591 namelen = strlen (name);
15592 prefixlen = strlen (prefix);
15593 imp_name = (char *) alloca (namelen + prefixlen + 1);
15594 memcpy (imp_name, prefix, prefixlen);
15595 memcpy (imp_name + prefixlen, name, namelen + 1);
15597 name = ggc_alloc_string (imp_name, namelen + prefixlen);
15598 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
15599 SET_SYMBOL_REF_DECL (rtl, to);
15600 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
15603 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
15604 #ifdef SUB_TARGET_RECORD_STUB
15605 SUB_TARGET_RECORD_STUB (name);
15609 rtl = gen_const_mem (Pmode, rtl);
15610 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
15612 SET_DECL_RTL (to, rtl);
15613 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
15618 /* Expand SYMBOL into its corresponding far-addresse symbol.
15619 WANT_REG is true if we require the result be a register. */
15622 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
15627 gcc_assert (SYMBOL_REF_DECL (symbol));
15628 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
15630 x = DECL_RTL (imp_decl);
15632 x = force_reg (Pmode, x);
15636 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
15637 true if we require the result be a register. */
15640 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
15645 gcc_assert (SYMBOL_REF_DECL (symbol));
15646 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
15648 x = DECL_RTL (imp_decl);
15650 x = force_reg (Pmode, x);
15654 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
15655 is true if we require the result be a register. */
15658 legitimize_pe_coff_symbol (rtx addr, bool inreg)
15660 if (!TARGET_PECOFF)
15663 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
15665 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
15666 return legitimize_dllimport_symbol (addr, inreg);
15667 if (GET_CODE (addr) == CONST
15668 && GET_CODE (XEXP (addr, 0)) == PLUS
15669 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
15670 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
15672 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
15673 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
15677 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
15679 if (GET_CODE (addr) == SYMBOL_REF
15680 && !is_imported_p (addr)
15681 && SYMBOL_REF_EXTERNAL_P (addr)
15682 && SYMBOL_REF_DECL (addr))
15683 return legitimize_pe_coff_extern_decl (addr, inreg);
15685 if (GET_CODE (addr) == CONST
15686 && GET_CODE (XEXP (addr, 0)) == PLUS
15687 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
15688 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
15689 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
15690 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
15692 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
15693 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
15698 /* Try machine-dependent ways of modifying an illegitimate address
15699 to be legitimate. If we find one, return the new, valid address.
15700 This macro is used in only one place: `memory_address' in explow.c.
15702 OLDX is the address as it was before break_out_memory_refs was called.
15703 In some cases it is useful to look at this to decide what needs to be done.
15705 It is always safe for this macro to do nothing. It exists to recognize
15706 opportunities to optimize the output.
15708 For the 80386, we handle X+REG by loading X into a register R and
15709 using R+REG. R will go in a general reg and indexing will be used.
15710 However, if REG is a broken-out memory address or multiplication,
15711 nothing needs to be done because REG can certainly go in a general reg.
15713 When -fpic is used, special handling is needed for symbolic references.
15714 See comments by legitimize_pic_address in i386.c for details. */
15717 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
15719 bool changed = false;
15722 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
15724 return legitimize_tls_address (x, (enum tls_model) log, false);
15725 if (GET_CODE (x) == CONST
15726 && GET_CODE (XEXP (x, 0)) == PLUS
15727 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
15728 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
15730 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
15731 (enum tls_model) log, false);
15732 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
15735 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
15737 rtx tmp = legitimize_pe_coff_symbol (x, true);
15742 if (flag_pic && SYMBOLIC_CONST (x))
15743 return legitimize_pic_address (x, 0);
15746 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
15747 return machopic_indirect_data_reference (x, 0);
15750 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
15751 if (GET_CODE (x) == ASHIFT
15752 && CONST_INT_P (XEXP (x, 1))
15753 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
15756 log = INTVAL (XEXP (x, 1));
15757 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
15758 GEN_INT (1 << log));
15761 if (GET_CODE (x) == PLUS)
15763 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
15765 if (GET_CODE (XEXP (x, 0)) == ASHIFT
15766 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
15767 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
15770 log = INTVAL (XEXP (XEXP (x, 0), 1));
15771 XEXP (x, 0) = gen_rtx_MULT (Pmode,
15772 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
15773 GEN_INT (1 << log));
15776 if (GET_CODE (XEXP (x, 1)) == ASHIFT
15777 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
15778 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
15781 log = INTVAL (XEXP (XEXP (x, 1), 1));
15782 XEXP (x, 1) = gen_rtx_MULT (Pmode,
15783 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
15784 GEN_INT (1 << log));
15787 /* Put multiply first if it isn't already. */
15788 if (GET_CODE (XEXP (x, 1)) == MULT)
15790 std::swap (XEXP (x, 0), XEXP (x, 1));
15794 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
15795 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
15796 created by virtual register instantiation, register elimination, and
15797 similar optimizations. */
15798 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
15801 x = gen_rtx_PLUS (Pmode,
15802 gen_rtx_PLUS (Pmode, XEXP (x, 0),
15803 XEXP (XEXP (x, 1), 0)),
15804 XEXP (XEXP (x, 1), 1));
15808 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
15809 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
15810 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
15811 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15812 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
15813 && CONSTANT_P (XEXP (x, 1)))
15816 rtx other = NULL_RTX;
15818 if (CONST_INT_P (XEXP (x, 1)))
15820 constant = XEXP (x, 1);
15821 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
15823 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
15825 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
15826 other = XEXP (x, 1);
15834 x = gen_rtx_PLUS (Pmode,
15835 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
15836 XEXP (XEXP (XEXP (x, 0), 1), 0)),
15837 plus_constant (Pmode, other,
15838 INTVAL (constant)));
15842 if (changed && ix86_legitimate_address_p (mode, x, false))
15845 if (GET_CODE (XEXP (x, 0)) == MULT)
15848 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
15851 if (GET_CODE (XEXP (x, 1)) == MULT)
15854 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
15858 && REG_P (XEXP (x, 1))
15859 && REG_P (XEXP (x, 0)))
15862 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
15865 x = legitimize_pic_address (x, 0);
15868 if (changed && ix86_legitimate_address_p (mode, x, false))
15871 if (REG_P (XEXP (x, 0)))
15873 rtx temp = gen_reg_rtx (Pmode);
15874 rtx val = force_operand (XEXP (x, 1), temp);
15877 val = convert_to_mode (Pmode, val, 1);
15878 emit_move_insn (temp, val);
15881 XEXP (x, 1) = temp;
15885 else if (REG_P (XEXP (x, 1)))
15887 rtx temp = gen_reg_rtx (Pmode);
15888 rtx val = force_operand (XEXP (x, 0), temp);
15891 val = convert_to_mode (Pmode, val, 1);
15892 emit_move_insn (temp, val);
15895 XEXP (x, 0) = temp;
15903 /* Print an integer constant expression in assembler syntax. Addition
15904 and subtraction are the only arithmetic that may appear in these
15905 expressions. FILE is the stdio stream to write to, X is the rtx, and
15906 CODE is the operand print code from the output string. */
15909 output_pic_addr_const (FILE *file, rtx x, int code)
15913 switch (GET_CODE (x))
15916 gcc_assert (flag_pic);
15921 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
15922 output_addr_const (file, x);
15925 const char *name = XSTR (x, 0);
15927 /* Mark the decl as referenced so that cgraph will
15928 output the function. */
15929 if (SYMBOL_REF_DECL (x))
15930 mark_decl_referenced (SYMBOL_REF_DECL (x));
15933 if (MACHOPIC_INDIRECT
15934 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
15935 name = machopic_indirection_name (x, /*stub_p=*/true);
15937 assemble_name (file, name);
15939 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
15940 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
15941 fputs ("@PLT", file);
15948 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
15949 assemble_name (asm_out_file, buf);
15953 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15957 /* This used to output parentheses around the expression,
15958 but that does not work on the 386 (either ATT or BSD assembler). */
15959 output_pic_addr_const (file, XEXP (x, 0), code);
15963 /* We can't handle floating point constants;
15964 TARGET_PRINT_OPERAND must handle them. */
15965 output_operand_lossage ("floating constant misused");
15969 /* Some assemblers need integer constants to appear first. */
15970 if (CONST_INT_P (XEXP (x, 0)))
15972 output_pic_addr_const (file, XEXP (x, 0), code);
15974 output_pic_addr_const (file, XEXP (x, 1), code);
15978 gcc_assert (CONST_INT_P (XEXP (x, 1)));
15979 output_pic_addr_const (file, XEXP (x, 1), code);
15981 output_pic_addr_const (file, XEXP (x, 0), code);
15987 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
15988 output_pic_addr_const (file, XEXP (x, 0), code);
15990 output_pic_addr_const (file, XEXP (x, 1), code);
15992 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
15996 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
15998 bool f = i386_asm_output_addr_const_extra (file, x);
16003 gcc_assert (XVECLEN (x, 0) == 1);
16004 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
16005 switch (XINT (x, 1))
16008 fputs ("@GOT", file);
16010 case UNSPEC_GOTOFF:
16011 fputs ("@GOTOFF", file);
16013 case UNSPEC_PLTOFF:
16014 fputs ("@PLTOFF", file);
16017 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16018 "(%rip)" : "[rip]", file);
16020 case UNSPEC_GOTPCREL:
16021 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16022 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
16024 case UNSPEC_GOTTPOFF:
16025 /* FIXME: This might be @TPOFF in Sun ld too. */
16026 fputs ("@gottpoff", file);
16029 fputs ("@tpoff", file);
16031 case UNSPEC_NTPOFF:
16033 fputs ("@tpoff", file);
16035 fputs ("@ntpoff", file);
16037 case UNSPEC_DTPOFF:
16038 fputs ("@dtpoff", file);
16040 case UNSPEC_GOTNTPOFF:
16042 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16043 "@gottpoff(%rip)": "@gottpoff[rip]", file);
16045 fputs ("@gotntpoff", file);
16047 case UNSPEC_INDNTPOFF:
16048 fputs ("@indntpoff", file);
16051 case UNSPEC_MACHOPIC_OFFSET:
16053 machopic_output_function_base_name (file);
16057 output_operand_lossage ("invalid UNSPEC as operand");
16063 output_operand_lossage ("invalid expression as operand");
16067 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
16068 We need to emit DTP-relative relocations. */
16070 static void ATTRIBUTE_UNUSED
16071 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
16073 fputs (ASM_LONG, file);
16074 output_addr_const (file, x);
16075 fputs ("@dtpoff", file);
16081 fputs (", 0", file);
16084 gcc_unreachable ();
16088 /* Return true if X is a representation of the PIC register. This copes
16089 with calls from ix86_find_base_term, where the register might have
16090 been replaced by a cselib value. */
16093 ix86_pic_register_p (rtx x)
16095 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
16096 return (pic_offset_table_rtx
16097 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
16098 else if (!REG_P (x))
16100 else if (pic_offset_table_rtx)
16102 if (REGNO (x) == REGNO (pic_offset_table_rtx))
16104 if (HARD_REGISTER_P (x)
16105 && !HARD_REGISTER_P (pic_offset_table_rtx)
16106 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
16111 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
16114 /* Helper function for ix86_delegitimize_address.
16115 Attempt to delegitimize TLS local-exec accesses. */
16118 ix86_delegitimize_tls_address (rtx orig_x)
16120 rtx x = orig_x, unspec;
16121 struct ix86_address addr;
16123 if (!TARGET_TLS_DIRECT_SEG_REFS)
16127 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
16129 if (ix86_decompose_address (x, &addr) == 0
16130 || addr.seg != DEFAULT_TLS_SEG_REG
16131 || addr.disp == NULL_RTX
16132 || GET_CODE (addr.disp) != CONST)
16134 unspec = XEXP (addr.disp, 0);
16135 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
16136 unspec = XEXP (unspec, 0);
16137 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
16139 x = XVECEXP (unspec, 0, 0);
16140 gcc_assert (GET_CODE (x) == SYMBOL_REF);
16141 if (unspec != XEXP (addr.disp, 0))
16142 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
16145 rtx idx = addr.index;
16146 if (addr.scale != 1)
16147 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
16148 x = gen_rtx_PLUS (Pmode, idx, x);
16151 x = gen_rtx_PLUS (Pmode, addr.base, x);
16152 if (MEM_P (orig_x))
16153 x = replace_equiv_address_nv (orig_x, x);
16157 /* In the name of slightly smaller debug output, and to cater to
16158 general assembler lossage, recognize PIC+GOTOFF and turn it back
16159 into a direct symbol reference.
16161 On Darwin, this is necessary to avoid a crash, because Darwin
16162 has a different PIC label for each routine but the DWARF debugging
16163 information is not associated with any particular routine, so it's
16164 necessary to remove references to the PIC label from RTL stored by
16165 the DWARF output code. */
16168 ix86_delegitimize_address (rtx x)
16170 rtx orig_x = delegitimize_mem_from_attrs (x);
16171 /* addend is NULL or some rtx if x is something+GOTOFF where
16172 something doesn't include the PIC register. */
16173 rtx addend = NULL_RTX;
16174 /* reg_addend is NULL or a multiple of some register. */
16175 rtx reg_addend = NULL_RTX;
16176 /* const_addend is NULL or a const_int. */
16177 rtx const_addend = NULL_RTX;
16178 /* This is the result, or NULL. */
16179 rtx result = NULL_RTX;
16188 if (GET_CODE (x) == CONST
16189 && GET_CODE (XEXP (x, 0)) == PLUS
16190 && GET_MODE (XEXP (x, 0)) == Pmode
16191 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
16192 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
16193 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
16195 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
16196 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
16197 if (MEM_P (orig_x))
16198 x = replace_equiv_address_nv (orig_x, x);
16202 if (GET_CODE (x) == CONST
16203 && GET_CODE (XEXP (x, 0)) == UNSPEC
16204 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
16205 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
16206 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
16208 x = XVECEXP (XEXP (x, 0), 0, 0);
16209 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
16211 x = simplify_gen_subreg (GET_MODE (orig_x), x,
16219 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
16220 return ix86_delegitimize_tls_address (orig_x);
16222 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
16223 and -mcmodel=medium -fpic. */
16226 if (GET_CODE (x) != PLUS
16227 || GET_CODE (XEXP (x, 1)) != CONST)
16228 return ix86_delegitimize_tls_address (orig_x);
16230 if (ix86_pic_register_p (XEXP (x, 0)))
16231 /* %ebx + GOT/GOTOFF */
16233 else if (GET_CODE (XEXP (x, 0)) == PLUS)
16235 /* %ebx + %reg * scale + GOT/GOTOFF */
16236 reg_addend = XEXP (x, 0);
16237 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
16238 reg_addend = XEXP (reg_addend, 1);
16239 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
16240 reg_addend = XEXP (reg_addend, 0);
16243 reg_addend = NULL_RTX;
16244 addend = XEXP (x, 0);
16248 addend = XEXP (x, 0);
16250 x = XEXP (XEXP (x, 1), 0);
16251 if (GET_CODE (x) == PLUS
16252 && CONST_INT_P (XEXP (x, 1)))
16254 const_addend = XEXP (x, 1);
16258 if (GET_CODE (x) == UNSPEC
16259 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
16260 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
16261 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
16262 && !MEM_P (orig_x) && !addend)))
16263 result = XVECEXP (x, 0, 0);
16265 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
16266 && !MEM_P (orig_x))
16267 result = XVECEXP (x, 0, 0);
16270 return ix86_delegitimize_tls_address (orig_x);
16273 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
16275 result = gen_rtx_PLUS (Pmode, reg_addend, result);
16278 /* If the rest of original X doesn't involve the PIC register, add
16279 addend and subtract pic_offset_table_rtx. This can happen e.g.
16281 leal (%ebx, %ecx, 4), %ecx
16283 movl foo@GOTOFF(%ecx), %edx
16284 in which case we return (%ecx - %ebx) + foo
16285 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
16286 and reload has completed. */
16287 if (pic_offset_table_rtx
16288 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
16289 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
16290 pic_offset_table_rtx),
16292 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
16294 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
16295 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
16296 result = gen_rtx_PLUS (Pmode, tmp, result);
16301 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
16303 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
16304 if (result == NULL_RTX)
16310 /* If X is a machine specific address (i.e. a symbol or label being
16311 referenced as a displacement from the GOT implemented using an
16312 UNSPEC), then return the base term. Otherwise return X. */
16315 ix86_find_base_term (rtx x)
16321 if (GET_CODE (x) != CONST)
16323 term = XEXP (x, 0);
16324 if (GET_CODE (term) == PLUS
16325 && CONST_INT_P (XEXP (term, 1)))
16326 term = XEXP (term, 0);
16327 if (GET_CODE (term) != UNSPEC
16328 || (XINT (term, 1) != UNSPEC_GOTPCREL
16329 && XINT (term, 1) != UNSPEC_PCREL))
16332 return XVECEXP (term, 0, 0);
16335 return ix86_delegitimize_address (x);
16339 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
16340 bool fp, FILE *file)
16342 const char *suffix;
16344 if (mode == CCFPmode || mode == CCFPUmode)
16346 code = ix86_fp_compare_code_to_integer (code);
16350 code = reverse_condition (code);
16401 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
16405 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
16406 Those same assemblers have the same but opposite lossage on cmov. */
16407 if (mode == CCmode)
16408 suffix = fp ? "nbe" : "a";
16410 gcc_unreachable ();
16426 gcc_unreachable ();
16430 if (mode == CCmode)
16432 else if (mode == CCCmode)
16433 suffix = fp ? "b" : "c";
16435 gcc_unreachable ();
16451 gcc_unreachable ();
16455 if (mode == CCmode)
16457 else if (mode == CCCmode)
16458 suffix = fp ? "nb" : "nc";
16460 gcc_unreachable ();
16463 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
16467 if (mode == CCmode)
16470 gcc_unreachable ();
16473 suffix = fp ? "u" : "p";
16476 suffix = fp ? "nu" : "np";
16479 gcc_unreachable ();
16481 fputs (suffix, file);
16484 /* Print the name of register X to FILE based on its machine mode and number.
16485 If CODE is 'w', pretend the mode is HImode.
16486 If CODE is 'b', pretend the mode is QImode.
16487 If CODE is 'k', pretend the mode is SImode.
16488 If CODE is 'q', pretend the mode is DImode.
16489 If CODE is 'x', pretend the mode is V4SFmode.
16490 If CODE is 't', pretend the mode is V8SFmode.
16491 If CODE is 'g', pretend the mode is V16SFmode.
16492 If CODE is 'h', pretend the reg is the 'high' byte register.
16493 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
16494 If CODE is 'd', duplicate the operand for AVX instruction.
16498 print_reg (rtx x, int code, FILE *file)
16502 unsigned int regno;
16505 if (ASSEMBLER_DIALECT == ASM_ATT)
16510 gcc_assert (TARGET_64BIT);
16511 fputs ("rip", file);
16515 if (code == 'y' && STACK_TOP_P (x))
16517 fputs ("st(0)", file);
16523 else if (code == 'b')
16525 else if (code == 'k')
16527 else if (code == 'q')
16529 else if (code == 'h')
16531 else if (code == 'x')
16533 else if (code == 't')
16535 else if (code == 'g')
16538 msize = GET_MODE_SIZE (GET_MODE (x));
16540 regno = true_regnum (x);
16542 gcc_assert (regno != ARG_POINTER_REGNUM
16543 && regno != FRAME_POINTER_REGNUM
16544 && regno != FLAGS_REG
16545 && regno != FPSR_REG
16546 && regno != FPCR_REG);
16548 duplicated = code == 'd' && TARGET_AVX;
16554 if (LEGACY_INT_REGNO_P (regno))
16555 putc (msize == 8 && TARGET_64BIT ? 'r' : 'e', file);
16560 reg = hi_reg_name[regno];
16563 if (regno >= ARRAY_SIZE (qi_reg_name))
16565 reg = qi_reg_name[regno];
16568 if (regno >= ARRAY_SIZE (qi_high_reg_name))
16570 reg = qi_high_reg_name[regno];
16574 if (SSE_REGNO_P (regno))
16576 gcc_assert (!duplicated);
16577 putc (msize == 32 ? 'y' : 'z', file);
16578 reg = hi_reg_name[regno] + 1;
16583 gcc_unreachable ();
16588 /* Irritatingly, AMD extended registers use
16589 different naming convention: "r%d[bwd]" */
16590 if (REX_INT_REGNO_P (regno))
16592 gcc_assert (TARGET_64BIT);
16596 error ("extended registers have no high halves");
16611 error ("unsupported operand size for extended register");
16619 if (ASSEMBLER_DIALECT == ASM_ATT)
16620 fprintf (file, ", %%%s", reg);
16622 fprintf (file, ", %s", reg);
16626 /* Meaning of CODE:
16627 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
16628 C -- print opcode suffix for set/cmov insn.
16629 c -- like C, but print reversed condition
16630 F,f -- likewise, but for floating-point.
16631 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
16633 R -- print embeded rounding and sae.
16634 r -- print only sae.
16635 z -- print the opcode suffix for the size of the current operand.
16636 Z -- likewise, with special suffixes for x87 instructions.
16637 * -- print a star (in certain assembler syntax)
16638 A -- print an absolute memory reference.
16639 E -- print address with DImode register names if TARGET_64BIT.
16640 w -- print the operand as if it's a "word" (HImode) even if it isn't.
16641 s -- print a shift double count, followed by the assemblers argument
16643 b -- print the QImode name of the register for the indicated operand.
16644 %b0 would print %al if operands[0] is reg 0.
16645 w -- likewise, print the HImode name of the register.
16646 k -- likewise, print the SImode name of the register.
16647 q -- likewise, print the DImode name of the register.
16648 x -- likewise, print the V4SFmode name of the register.
16649 t -- likewise, print the V8SFmode name of the register.
16650 g -- likewise, print the V16SFmode name of the register.
16651 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
16652 y -- print "st(0)" instead of "st" as a register.
16653 d -- print duplicated register operand for AVX instruction.
16654 D -- print condition for SSE cmp instruction.
16655 P -- if PIC, print an @PLT suffix.
16656 p -- print raw symbol name.
16657 X -- don't print any sort of PIC '@' suffix for a symbol.
16658 & -- print some in-use local-dynamic symbol name.
16659 H -- print a memory address offset by 8; used for sse high-parts
16660 Y -- print condition for XOP pcom* instruction.
16661 + -- print a branch hint as 'cs' or 'ds' prefix
16662 ; -- print a semicolon (after prefixes due to bug in older gas).
16663 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
16664 @ -- print a segment register of thread base pointer load
16665 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
16666 ! -- print MPX prefix for jxx/call/ret instructions if required.
16670 ix86_print_operand (FILE *file, rtx x, int code)
16677 switch (ASSEMBLER_DIALECT)
16684 /* Intel syntax. For absolute addresses, registers should not
16685 be surrounded by braces. */
16689 ix86_print_operand (file, x, 0);
16696 gcc_unreachable ();
16699 ix86_print_operand (file, x, 0);
16703 /* Wrap address in an UNSPEC to declare special handling. */
16705 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
16707 output_address (VOIDmode, x);
16711 if (ASSEMBLER_DIALECT == ASM_ATT)
16716 if (ASSEMBLER_DIALECT == ASM_ATT)
16721 if (ASSEMBLER_DIALECT == ASM_ATT)
16726 if (ASSEMBLER_DIALECT == ASM_ATT)
16731 if (ASSEMBLER_DIALECT == ASM_ATT)
16736 if (ASSEMBLER_DIALECT == ASM_ATT)
16741 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
16742 if (ASSEMBLER_DIALECT != ASM_ATT)
16745 switch (GET_MODE_SIZE (GET_MODE (x)))
16760 output_operand_lossage
16761 ("invalid operand size for operand code 'O'");
16770 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
16772 /* Opcodes don't get size suffixes if using Intel opcodes. */
16773 if (ASSEMBLER_DIALECT == ASM_INTEL)
16776 switch (GET_MODE_SIZE (GET_MODE (x)))
16795 output_operand_lossage
16796 ("invalid operand size for operand code 'z'");
16801 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16803 (0, "non-integer operand used with operand code 'z'");
16807 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
16808 if (ASSEMBLER_DIALECT == ASM_INTEL)
16811 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
16813 switch (GET_MODE_SIZE (GET_MODE (x)))
16816 #ifdef HAVE_AS_IX86_FILDS
16826 #ifdef HAVE_AS_IX86_FILDQ
16829 fputs ("ll", file);
16837 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16839 /* 387 opcodes don't get size suffixes
16840 if the operands are registers. */
16841 if (STACK_REG_P (x))
16844 switch (GET_MODE_SIZE (GET_MODE (x)))
16865 output_operand_lossage
16866 ("invalid operand type used with operand code 'Z'");
16870 output_operand_lossage
16871 ("invalid operand size for operand code 'Z'");
16890 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
16892 ix86_print_operand (file, x, 0);
16893 fputs (", ", file);
16898 switch (GET_CODE (x))
16901 fputs ("neq", file);
16904 fputs ("eq", file);
16908 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
16912 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
16916 fputs ("le", file);
16920 fputs ("lt", file);
16923 fputs ("unord", file);
16926 fputs ("ord", file);
16929 fputs ("ueq", file);
16932 fputs ("nlt", file);
16935 fputs ("nle", file);
16938 fputs ("ule", file);
16941 fputs ("ult", file);
16944 fputs ("une", file);
16947 output_operand_lossage ("operand is not a condition code, "
16948 "invalid operand code 'Y'");
16954 /* Little bit of braindamage here. The SSE compare instructions
16955 does use completely different names for the comparisons that the
16956 fp conditional moves. */
16957 switch (GET_CODE (x))
16962 fputs ("eq_us", file);
16966 fputs ("eq", file);
16971 fputs ("nge", file);
16975 fputs ("lt", file);
16980 fputs ("ngt", file);
16984 fputs ("le", file);
16987 fputs ("unord", file);
16992 fputs ("neq_oq", file);
16996 fputs ("neq", file);
17001 fputs ("ge", file);
17005 fputs ("nlt", file);
17010 fputs ("gt", file);
17014 fputs ("nle", file);
17017 fputs ("ord", file);
17020 output_operand_lossage ("operand is not a condition code, "
17021 "invalid operand code 'D'");
17028 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
17029 if (ASSEMBLER_DIALECT == ASM_ATT)
17035 if (!COMPARISON_P (x))
17037 output_operand_lossage ("operand is not a condition code, "
17038 "invalid operand code '%c'", code);
17041 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
17042 code == 'c' || code == 'f',
17043 code == 'F' || code == 'f',
17048 if (!offsettable_memref_p (x))
17050 output_operand_lossage ("operand is not an offsettable memory "
17051 "reference, invalid operand code 'H'");
17054 /* It doesn't actually matter what mode we use here, as we're
17055 only going to use this for printing. */
17056 x = adjust_address_nv (x, DImode, 8);
17057 /* Output 'qword ptr' for intel assembler dialect. */
17058 if (ASSEMBLER_DIALECT == ASM_INTEL)
17063 gcc_assert (CONST_INT_P (x));
17065 if (INTVAL (x) & IX86_HLE_ACQUIRE)
17066 #ifdef HAVE_AS_IX86_HLE
17067 fputs ("xacquire ", file);
17069 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
17071 else if (INTVAL (x) & IX86_HLE_RELEASE)
17072 #ifdef HAVE_AS_IX86_HLE
17073 fputs ("xrelease ", file);
17075 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
17077 /* We do not want to print value of the operand. */
17081 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
17082 fputs ("{z}", file);
17086 gcc_assert (CONST_INT_P (x));
17087 gcc_assert (INTVAL (x) == ROUND_SAE);
17089 if (ASSEMBLER_DIALECT == ASM_INTEL)
17090 fputs (", ", file);
17092 fputs ("{sae}", file);
17094 if (ASSEMBLER_DIALECT == ASM_ATT)
17095 fputs (", ", file);
17100 gcc_assert (CONST_INT_P (x));
17102 if (ASSEMBLER_DIALECT == ASM_INTEL)
17103 fputs (", ", file);
17105 switch (INTVAL (x))
17107 case ROUND_NEAREST_INT | ROUND_SAE:
17108 fputs ("{rn-sae}", file);
17110 case ROUND_NEG_INF | ROUND_SAE:
17111 fputs ("{rd-sae}", file);
17113 case ROUND_POS_INF | ROUND_SAE:
17114 fputs ("{ru-sae}", file);
17116 case ROUND_ZERO | ROUND_SAE:
17117 fputs ("{rz-sae}", file);
17120 gcc_unreachable ();
17123 if (ASSEMBLER_DIALECT == ASM_ATT)
17124 fputs (", ", file);
17129 if (ASSEMBLER_DIALECT == ASM_ATT)
17135 const char *name = get_some_local_dynamic_name ();
17137 output_operand_lossage ("'%%&' used without any "
17138 "local dynamic TLS references");
17140 assemble_name (file, name);
17149 || optimize_function_for_size_p (cfun)
17150 || !TARGET_BRANCH_PREDICTION_HINTS)
17153 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
17156 int pred_val = XINT (x, 0);
17158 if (pred_val < REG_BR_PROB_BASE * 45 / 100
17159 || pred_val > REG_BR_PROB_BASE * 55 / 100)
17161 bool taken = pred_val > REG_BR_PROB_BASE / 2;
17163 = final_forward_branch_p (current_output_insn) == 0;
17165 /* Emit hints only in the case default branch prediction
17166 heuristics would fail. */
17167 if (taken != cputaken)
17169 /* We use 3e (DS) prefix for taken branches and
17170 2e (CS) prefix for not taken branches. */
17172 fputs ("ds ; ", file);
17174 fputs ("cs ; ", file);
17182 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
17188 if (ASSEMBLER_DIALECT == ASM_ATT)
17191 /* The kernel uses a different segment register for performance
17192 reasons; a system call would not have to trash the userspace
17193 segment register, which would be expensive. */
17194 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
17195 fputs ("fs", file);
17197 fputs ("gs", file);
17201 putc (TARGET_AVX2 ? 'i' : 'f', file);
17205 if (TARGET_64BIT && Pmode != word_mode)
17206 fputs ("addr32 ", file);
17210 if (ix86_bnd_prefixed_insn_p (current_output_insn))
17211 fputs ("bnd ", file);
17215 output_operand_lossage ("invalid operand code '%c'", code);
17220 print_reg (x, code, file);
17222 else if (MEM_P (x))
17224 rtx addr = XEXP (x, 0);
17226 /* No `byte ptr' prefix for call instructions ... */
17227 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
17229 machine_mode mode = GET_MODE (x);
17232 /* Check for explicit size override codes. */
17235 else if (code == 'w')
17237 else if (code == 'k')
17239 else if (code == 'q')
17241 else if (code == 'x')
17243 else if (mode == BLKmode)
17244 /* ... or BLKmode operands, when not overridden. */
17247 switch (GET_MODE_SIZE (mode))
17249 case 1: size = "BYTE"; break;
17250 case 2: size = "WORD"; break;
17251 case 4: size = "DWORD"; break;
17252 case 8: size = "QWORD"; break;
17253 case 12: size = "TBYTE"; break;
17255 if (mode == XFmode)
17260 case 32: size = "YMMWORD"; break;
17261 case 64: size = "ZMMWORD"; break;
17263 gcc_unreachable ();
17267 fputs (size, file);
17268 fputs (" PTR ", file);
17272 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
17273 output_operand_lossage ("invalid constraints for operand");
17275 ix86_print_operand_address_as
17276 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
17279 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
17283 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17285 if (ASSEMBLER_DIALECT == ASM_ATT)
17287 /* Sign extend 32bit SFmode immediate to 8 bytes. */
17289 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
17290 (unsigned long long) (int) l);
17292 fprintf (file, "0x%08x", (unsigned int) l);
17295 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
17299 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17301 if (ASSEMBLER_DIALECT == ASM_ATT)
17303 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
17306 /* These float cases don't actually occur as immediate operands. */
17307 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
17311 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
17312 fputs (dstr, file);
17317 /* We have patterns that allow zero sets of memory, for instance.
17318 In 64-bit mode, we should probably support all 8-byte vectors,
17319 since we can in fact encode that into an immediate. */
17320 if (GET_CODE (x) == CONST_VECTOR)
17322 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
17326 if (code != 'P' && code != 'p')
17328 if (CONST_INT_P (x))
17330 if (ASSEMBLER_DIALECT == ASM_ATT)
17333 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
17334 || GET_CODE (x) == LABEL_REF)
17336 if (ASSEMBLER_DIALECT == ASM_ATT)
17339 fputs ("OFFSET FLAT:", file);
17342 if (CONST_INT_P (x))
17343 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
17344 else if (flag_pic || MACHOPIC_INDIRECT)
17345 output_pic_addr_const (file, x, code);
17347 output_addr_const (file, x);
17352 ix86_print_operand_punct_valid_p (unsigned char code)
17354 return (code == '@' || code == '*' || code == '+' || code == '&'
17355 || code == ';' || code == '~' || code == '^' || code == '!');
17358 /* Print a memory operand whose address is ADDR. */
17361 ix86_print_operand_address_as (FILE *file, rtx addr,
17362 addr_space_t as, bool no_rip)
17364 struct ix86_address parts;
17365 rtx base, index, disp;
17371 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
17373 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
17374 gcc_assert (parts.index == NULL_RTX);
17375 parts.index = XVECEXP (addr, 0, 1);
17376 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
17377 addr = XVECEXP (addr, 0, 0);
17380 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
17382 gcc_assert (TARGET_64BIT);
17383 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
17386 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
17388 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
17389 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
17390 if (parts.base != NULL_RTX)
17392 parts.index = parts.base;
17395 parts.base = XVECEXP (addr, 0, 0);
17396 addr = XVECEXP (addr, 0, 0);
17398 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
17400 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
17401 gcc_assert (parts.index == NULL_RTX);
17402 parts.index = XVECEXP (addr, 0, 1);
17403 addr = XVECEXP (addr, 0, 0);
17406 ok = ix86_decompose_address (addr, &parts);
17411 index = parts.index;
17413 scale = parts.scale;
17415 if (ADDR_SPACE_GENERIC_P (as))
17418 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
17420 if (!ADDR_SPACE_GENERIC_P (as))
17422 const char *string;
17424 if (as == ADDR_SPACE_SEG_FS)
17425 string = (ASSEMBLER_DIALECT == ASM_ATT ? "%fs:" : "fs:");
17426 else if (as == ADDR_SPACE_SEG_GS)
17427 string = (ASSEMBLER_DIALECT == ASM_ATT ? "%gs:" : "gs:");
17429 gcc_unreachable ();
17430 fputs (string, file);
17433 /* Use one byte shorter RIP relative addressing for 64bit mode. */
17434 if (TARGET_64BIT && !base && !index && !no_rip)
17438 if (GET_CODE (disp) == CONST
17439 && GET_CODE (XEXP (disp, 0)) == PLUS
17440 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
17441 symbol = XEXP (XEXP (disp, 0), 0);
17443 if (GET_CODE (symbol) == LABEL_REF
17444 || (GET_CODE (symbol) == SYMBOL_REF
17445 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
17449 if (!base && !index)
17451 /* Displacement only requires special attention. */
17452 if (CONST_INT_P (disp))
17454 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == ADDR_SPACE_GENERIC)
17455 fputs ("ds:", file);
17456 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
17459 output_pic_addr_const (file, disp, 0);
17461 output_addr_const (file, disp);
17465 /* Print SImode register names to force addr32 prefix. */
17466 if (SImode_address_operand (addr, VOIDmode))
17470 gcc_assert (TARGET_64BIT);
17471 switch (GET_CODE (addr))
17474 gcc_assert (GET_MODE (addr) == SImode);
17475 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
17479 gcc_assert (GET_MODE (addr) == DImode);
17482 gcc_unreachable ();
17485 gcc_assert (!code);
17491 && CONST_INT_P (disp)
17492 && INTVAL (disp) < -16*1024*1024)
17494 /* X32 runs in 64-bit mode, where displacement, DISP, in
17495 address DISP(%r64), is encoded as 32-bit immediate sign-
17496 extended from 32-bit to 64-bit. For -0x40000300(%r64),
17497 address is %r64 + 0xffffffffbffffd00. When %r64 <
17498 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
17499 which is invalid for x32. The correct address is %r64
17500 - 0x40000300 == 0xf7ffdd64. To properly encode
17501 -0x40000300(%r64) for x32, we zero-extend negative
17502 displacement by forcing addr32 prefix which truncates
17503 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
17504 zero-extend all negative displacements, including -1(%rsp).
17505 However, for small negative displacements, sign-extension
17506 won't cause overflow. We only zero-extend negative
17507 displacements if they < -16*1024*1024, which is also used
17508 to check legitimate address displacements for PIC. */
17512 if (ASSEMBLER_DIALECT == ASM_ATT)
17517 output_pic_addr_const (file, disp, 0);
17518 else if (GET_CODE (disp) == LABEL_REF)
17519 output_asm_label (disp);
17521 output_addr_const (file, disp);
17526 print_reg (base, code, file);
17530 print_reg (index, vsib ? 0 : code, file);
17531 if (scale != 1 || vsib)
17532 fprintf (file, ",%d", scale);
17538 rtx offset = NULL_RTX;
17542 /* Pull out the offset of a symbol; print any symbol itself. */
17543 if (GET_CODE (disp) == CONST
17544 && GET_CODE (XEXP (disp, 0)) == PLUS
17545 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
17547 offset = XEXP (XEXP (disp, 0), 1);
17548 disp = gen_rtx_CONST (VOIDmode,
17549 XEXP (XEXP (disp, 0), 0));
17553 output_pic_addr_const (file, disp, 0);
17554 else if (GET_CODE (disp) == LABEL_REF)
17555 output_asm_label (disp);
17556 else if (CONST_INT_P (disp))
17559 output_addr_const (file, disp);
17565 print_reg (base, code, file);
17568 if (INTVAL (offset) >= 0)
17570 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
17574 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
17581 print_reg (index, vsib ? 0 : code, file);
17582 if (scale != 1 || vsib)
17583 fprintf (file, "*%d", scale);
17591 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
17593 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
17596 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
17599 i386_asm_output_addr_const_extra (FILE *file, rtx x)
17603 if (GET_CODE (x) != UNSPEC)
17606 op = XVECEXP (x, 0, 0);
17607 switch (XINT (x, 1))
17609 case UNSPEC_GOTTPOFF:
17610 output_addr_const (file, op);
17611 /* FIXME: This might be @TPOFF in Sun ld. */
17612 fputs ("@gottpoff", file);
17615 output_addr_const (file, op);
17616 fputs ("@tpoff", file);
17618 case UNSPEC_NTPOFF:
17619 output_addr_const (file, op);
17621 fputs ("@tpoff", file);
17623 fputs ("@ntpoff", file);
17625 case UNSPEC_DTPOFF:
17626 output_addr_const (file, op);
17627 fputs ("@dtpoff", file);
17629 case UNSPEC_GOTNTPOFF:
17630 output_addr_const (file, op);
17632 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
17633 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
17635 fputs ("@gotntpoff", file);
17637 case UNSPEC_INDNTPOFF:
17638 output_addr_const (file, op);
17639 fputs ("@indntpoff", file);
17642 case UNSPEC_MACHOPIC_OFFSET:
17643 output_addr_const (file, op);
17645 machopic_output_function_base_name (file);
17649 case UNSPEC_STACK_CHECK:
17653 gcc_assert (flag_split_stack);
17655 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
17656 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
17658 gcc_unreachable ();
17661 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
17672 /* Split one or more double-mode RTL references into pairs of half-mode
17673 references. The RTL can be REG, offsettable MEM, integer constant, or
17674 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
17675 split and "num" is its length. lo_half and hi_half are output arrays
17676 that parallel "operands". */
17679 split_double_mode (machine_mode mode, rtx operands[],
17680 int num, rtx lo_half[], rtx hi_half[])
17682 machine_mode half_mode;
17688 half_mode = DImode;
17691 half_mode = SImode;
17694 gcc_unreachable ();
17697 byte = GET_MODE_SIZE (half_mode);
17701 rtx op = operands[num];
17703 /* simplify_subreg refuse to split volatile memory addresses,
17704 but we still have to handle it. */
17707 lo_half[num] = adjust_address (op, half_mode, 0);
17708 hi_half[num] = adjust_address (op, half_mode, byte);
17712 lo_half[num] = simplify_gen_subreg (half_mode, op,
17713 GET_MODE (op) == VOIDmode
17714 ? mode : GET_MODE (op), 0);
17715 hi_half[num] = simplify_gen_subreg (half_mode, op,
17716 GET_MODE (op) == VOIDmode
17717 ? mode : GET_MODE (op), byte);
17722 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
17723 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
17724 is the expression of the binary operation. The output may either be
17725 emitted here, or returned to the caller, like all output_* functions.
17727 There is no guarantee that the operands are the same mode, as they
17728 might be within FLOAT or FLOAT_EXTEND expressions. */
17730 #ifndef SYSV386_COMPAT
17731 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
17732 wants to fix the assemblers because that causes incompatibility
17733 with gcc. No-one wants to fix gcc because that causes
17734 incompatibility with assemblers... You can use the option of
17735 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
17736 #define SYSV386_COMPAT 1
17740 output_387_binary_op (rtx insn, rtx *operands)
17742 static char buf[40];
17745 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
17747 /* Even if we do not want to check the inputs, this documents input
17748 constraints. Which helps in understanding the following code. */
17751 if (STACK_REG_P (operands[0])
17752 && ((REG_P (operands[1])
17753 && REGNO (operands[0]) == REGNO (operands[1])
17754 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
17755 || (REG_P (operands[2])
17756 && REGNO (operands[0]) == REGNO (operands[2])
17757 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
17758 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
17761 gcc_assert (is_sse);
17764 switch (GET_CODE (operands[3]))
17767 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17768 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17776 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17777 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17785 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17786 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17794 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17795 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17803 gcc_unreachable ();
17810 strcpy (buf, ssep);
17811 if (GET_MODE (operands[0]) == SFmode)
17812 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
17814 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
17818 strcpy (buf, ssep + 1);
17819 if (GET_MODE (operands[0]) == SFmode)
17820 strcat (buf, "ss\t{%2, %0|%0, %2}");
17822 strcat (buf, "sd\t{%2, %0|%0, %2}");
17828 switch (GET_CODE (operands[3]))
17832 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
17833 std::swap (operands[1], operands[2]);
17835 /* know operands[0] == operands[1]. */
17837 if (MEM_P (operands[2]))
17843 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
17845 if (STACK_TOP_P (operands[0]))
17846 /* How is it that we are storing to a dead operand[2]?
17847 Well, presumably operands[1] is dead too. We can't
17848 store the result to st(0) as st(0) gets popped on this
17849 instruction. Instead store to operands[2] (which I
17850 think has to be st(1)). st(1) will be popped later.
17851 gcc <= 2.8.1 didn't have this check and generated
17852 assembly code that the Unixware assembler rejected. */
17853 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
17855 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
17859 if (STACK_TOP_P (operands[0]))
17860 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
17862 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
17867 if (MEM_P (operands[1]))
17873 if (MEM_P (operands[2]))
17879 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
17882 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
17883 derived assemblers, confusingly reverse the direction of
17884 the operation for fsub{r} and fdiv{r} when the
17885 destination register is not st(0). The Intel assembler
17886 doesn't have this brain damage. Read !SYSV386_COMPAT to
17887 figure out what the hardware really does. */
17888 if (STACK_TOP_P (operands[0]))
17889 p = "{p\t%0, %2|rp\t%2, %0}";
17891 p = "{rp\t%2, %0|p\t%0, %2}";
17893 if (STACK_TOP_P (operands[0]))
17894 /* As above for fmul/fadd, we can't store to st(0). */
17895 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
17897 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
17902 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
17905 if (STACK_TOP_P (operands[0]))
17906 p = "{rp\t%0, %1|p\t%1, %0}";
17908 p = "{p\t%1, %0|rp\t%0, %1}";
17910 if (STACK_TOP_P (operands[0]))
17911 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
17913 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
17918 if (STACK_TOP_P (operands[0]))
17920 if (STACK_TOP_P (operands[1]))
17921 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
17923 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
17926 else if (STACK_TOP_P (operands[1]))
17929 p = "{\t%1, %0|r\t%0, %1}";
17931 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
17937 p = "{r\t%2, %0|\t%0, %2}";
17939 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
17945 gcc_unreachable ();
17952 /* Check if a 256bit AVX register is referenced inside of EXP. */
17955 ix86_check_avx256_register (const_rtx exp)
17957 if (SUBREG_P (exp))
17958 exp = SUBREG_REG (exp);
17960 return (REG_P (exp)
17961 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
17964 /* Return needed mode for entity in optimize_mode_switching pass. */
17967 ix86_avx_u128_mode_needed (rtx_insn *insn)
17973 /* Needed mode is set to AVX_U128_CLEAN if there are
17974 no 256bit modes used in function arguments. */
17975 for (link = CALL_INSN_FUNCTION_USAGE (insn);
17977 link = XEXP (link, 1))
17979 if (GET_CODE (XEXP (link, 0)) == USE)
17981 rtx arg = XEXP (XEXP (link, 0), 0);
17983 if (ix86_check_avx256_register (arg))
17984 return AVX_U128_DIRTY;
17988 return AVX_U128_CLEAN;
17991 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
17992 changes state only when a 256bit register is written to, but we need
17993 to prevent the compiler from moving optimal insertion point above
17994 eventual read from 256bit register. */
17995 subrtx_iterator::array_type array;
17996 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
17997 if (ix86_check_avx256_register (*iter))
17998 return AVX_U128_DIRTY;
18000 return AVX_U128_ANY;
18003 /* Return mode that i387 must be switched into
18004 prior to the execution of insn. */
18007 ix86_i387_mode_needed (int entity, rtx_insn *insn)
18009 enum attr_i387_cw mode;
18011 /* The mode UNINITIALIZED is used to store control word after a
18012 function call or ASM pattern. The mode ANY specify that function
18013 has no requirements on the control word and make no changes in the
18014 bits we are interested in. */
18017 || (NONJUMP_INSN_P (insn)
18018 && (asm_noperands (PATTERN (insn)) >= 0
18019 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
18020 return I387_CW_UNINITIALIZED;
18022 if (recog_memoized (insn) < 0)
18023 return I387_CW_ANY;
18025 mode = get_attr_i387_cw (insn);
18030 if (mode == I387_CW_TRUNC)
18035 if (mode == I387_CW_FLOOR)
18040 if (mode == I387_CW_CEIL)
18045 if (mode == I387_CW_MASK_PM)
18050 gcc_unreachable ();
18053 return I387_CW_ANY;
18056 /* Return mode that entity must be switched into
18057 prior to the execution of insn. */
18060 ix86_mode_needed (int entity, rtx_insn *insn)
18065 return ix86_avx_u128_mode_needed (insn);
18070 return ix86_i387_mode_needed (entity, insn);
18072 gcc_unreachable ();
18077 /* Check if a 256bit AVX register is referenced in stores. */
18080 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
18082 if (ix86_check_avx256_register (dest))
18084 bool *used = (bool *) data;
18089 /* Calculate mode of upper 128bit AVX registers after the insn. */
18092 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
18094 rtx pat = PATTERN (insn);
18096 if (vzeroupper_operation (pat, VOIDmode)
18097 || vzeroall_operation (pat, VOIDmode))
18098 return AVX_U128_CLEAN;
18100 /* We know that state is clean after CALL insn if there are no
18101 256bit registers used in the function return register. */
18104 bool avx_reg256_found = false;
18105 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
18107 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
18110 /* Otherwise, return current mode. Remember that if insn
18111 references AVX 256bit registers, the mode was already changed
18112 to DIRTY from MODE_NEEDED. */
18116 /* Return the mode that an insn results in. */
18119 ix86_mode_after (int entity, int mode, rtx_insn *insn)
18124 return ix86_avx_u128_mode_after (mode, insn);
18131 gcc_unreachable ();
18136 ix86_avx_u128_mode_entry (void)
18140 /* Entry mode is set to AVX_U128_DIRTY if there are
18141 256bit modes used in function arguments. */
18142 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
18143 arg = TREE_CHAIN (arg))
18145 rtx incoming = DECL_INCOMING_RTL (arg);
18147 if (incoming && ix86_check_avx256_register (incoming))
18148 return AVX_U128_DIRTY;
18151 return AVX_U128_CLEAN;
18154 /* Return a mode that ENTITY is assumed to be
18155 switched to at function entry. */
18158 ix86_mode_entry (int entity)
18163 return ix86_avx_u128_mode_entry ();
18168 return I387_CW_ANY;
18170 gcc_unreachable ();
18175 ix86_avx_u128_mode_exit (void)
18177 rtx reg = crtl->return_rtx;
18179 /* Exit mode is set to AVX_U128_DIRTY if there are
18180 256bit modes used in the function return register. */
18181 if (reg && ix86_check_avx256_register (reg))
18182 return AVX_U128_DIRTY;
18184 return AVX_U128_CLEAN;
18187 /* Return a mode that ENTITY is assumed to be
18188 switched to at function exit. */
18191 ix86_mode_exit (int entity)
18196 return ix86_avx_u128_mode_exit ();
18201 return I387_CW_ANY;
18203 gcc_unreachable ();
18208 ix86_mode_priority (int, int n)
18213 /* Output code to initialize control word copies used by trunc?f?i and
18214 rounding patterns. CURRENT_MODE is set to current control word,
18215 while NEW_MODE is set to new control word. */
18218 emit_i387_cw_initialization (int mode)
18220 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
18223 enum ix86_stack_slot slot;
18225 rtx reg = gen_reg_rtx (HImode);
18227 emit_insn (gen_x86_fnstcw_1 (stored_mode));
18228 emit_move_insn (reg, copy_rtx (stored_mode));
18230 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
18231 || optimize_insn_for_size_p ())
18235 case I387_CW_TRUNC:
18236 /* round toward zero (truncate) */
18237 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
18238 slot = SLOT_CW_TRUNC;
18241 case I387_CW_FLOOR:
18242 /* round down toward -oo */
18243 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
18244 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
18245 slot = SLOT_CW_FLOOR;
18249 /* round up toward +oo */
18250 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
18251 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
18252 slot = SLOT_CW_CEIL;
18255 case I387_CW_MASK_PM:
18256 /* mask precision exception for nearbyint() */
18257 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
18258 slot = SLOT_CW_MASK_PM;
18262 gcc_unreachable ();
18269 case I387_CW_TRUNC:
18270 /* round toward zero (truncate) */
18271 emit_insn (gen_insvsi_1 (reg, GEN_INT (0xc)));
18272 slot = SLOT_CW_TRUNC;
18275 case I387_CW_FLOOR:
18276 /* round down toward -oo */
18277 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x4)));
18278 slot = SLOT_CW_FLOOR;
18282 /* round up toward +oo */
18283 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x8)));
18284 slot = SLOT_CW_CEIL;
18287 case I387_CW_MASK_PM:
18288 /* mask precision exception for nearbyint() */
18289 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
18290 slot = SLOT_CW_MASK_PM;
18294 gcc_unreachable ();
18298 gcc_assert (slot < MAX_386_STACK_LOCALS);
18300 new_mode = assign_386_stack_local (HImode, slot);
18301 emit_move_insn (new_mode, reg);
18304 /* Emit vzeroupper. */
18307 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
18311 /* Cancel automatic vzeroupper insertion if there are
18312 live call-saved SSE registers at the insertion point. */
18314 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
18315 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
18319 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
18320 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
18323 emit_insn (gen_avx_vzeroupper ());
18326 /* Generate one or more insns to set ENTITY to MODE. */
18328 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
18329 is the set of hard registers live at the point where the insn(s)
18330 are to be inserted. */
18333 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
18334 HARD_REG_SET regs_live)
18339 if (mode == AVX_U128_CLEAN)
18340 ix86_avx_emit_vzeroupper (regs_live);
18346 if (mode != I387_CW_ANY
18347 && mode != I387_CW_UNINITIALIZED)
18348 emit_i387_cw_initialization (mode);
18351 gcc_unreachable ();
18355 /* Output code for INSN to convert a float to a signed int. OPERANDS
18356 are the insn operands. The output may be [HSD]Imode and the input
18357 operand may be [SDX]Fmode. */
18360 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
18362 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
18363 int dimode_p = GET_MODE (operands[0]) == DImode;
18364 int round_mode = get_attr_i387_cw (insn);
18366 /* Jump through a hoop or two for DImode, since the hardware has no
18367 non-popping instruction. We used to do this a different way, but
18368 that was somewhat fragile and broke with post-reload splitters. */
18369 if ((dimode_p || fisttp) && !stack_top_dies)
18370 output_asm_insn ("fld\t%y1", operands);
18372 gcc_assert (STACK_TOP_P (operands[1]));
18373 gcc_assert (MEM_P (operands[0]));
18374 gcc_assert (GET_MODE (operands[1]) != TFmode);
18377 output_asm_insn ("fisttp%Z0\t%0", operands);
18380 if (round_mode != I387_CW_ANY)
18381 output_asm_insn ("fldcw\t%3", operands);
18382 if (stack_top_dies || dimode_p)
18383 output_asm_insn ("fistp%Z0\t%0", operands);
18385 output_asm_insn ("fist%Z0\t%0", operands);
18386 if (round_mode != I387_CW_ANY)
18387 output_asm_insn ("fldcw\t%2", operands);
18393 /* Output code for x87 ffreep insn. The OPNO argument, which may only
18394 have the values zero or one, indicates the ffreep insn's operand
18395 from the OPERANDS array. */
18397 static const char *
18398 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
18400 if (TARGET_USE_FFREEP)
18401 #ifdef HAVE_AS_IX86_FFREEP
18402 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
18405 static char retval[32];
18406 int regno = REGNO (operands[opno]);
18408 gcc_assert (STACK_REGNO_P (regno));
18410 regno -= FIRST_STACK_REG;
18412 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
18417 return opno ? "fstp\t%y1" : "fstp\t%y0";
18421 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
18422 should be used. UNORDERED_P is true when fucom should be used. */
18425 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
18427 int stack_top_dies;
18428 rtx cmp_op0, cmp_op1;
18429 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
18433 cmp_op0 = operands[0];
18434 cmp_op1 = operands[1];
18438 cmp_op0 = operands[1];
18439 cmp_op1 = operands[2];
18444 if (GET_MODE (operands[0]) == SFmode)
18446 return "%vucomiss\t{%1, %0|%0, %1}";
18448 return "%vcomiss\t{%1, %0|%0, %1}";
18451 return "%vucomisd\t{%1, %0|%0, %1}";
18453 return "%vcomisd\t{%1, %0|%0, %1}";
18456 gcc_assert (STACK_TOP_P (cmp_op0));
18458 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
18460 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
18462 if (stack_top_dies)
18464 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
18465 return output_387_ffreep (operands, 1);
18468 return "ftst\n\tfnstsw\t%0";
18471 if (STACK_REG_P (cmp_op1)
18473 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
18474 && REGNO (cmp_op1) != FIRST_STACK_REG)
18476 /* If both the top of the 387 stack dies, and the other operand
18477 is also a stack register that dies, then this must be a
18478 `fcompp' float compare */
18482 /* There is no double popping fcomi variant. Fortunately,
18483 eflags is immune from the fstp's cc clobbering. */
18485 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
18487 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
18488 return output_387_ffreep (operands, 0);
18493 return "fucompp\n\tfnstsw\t%0";
18495 return "fcompp\n\tfnstsw\t%0";
18500 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
18502 static const char * const alt[16] =
18504 "fcom%Z2\t%y2\n\tfnstsw\t%0",
18505 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
18506 "fucom%Z2\t%y2\n\tfnstsw\t%0",
18507 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
18509 "ficom%Z2\t%y2\n\tfnstsw\t%0",
18510 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
18514 "fcomi\t{%y1, %0|%0, %y1}",
18515 "fcomip\t{%y1, %0|%0, %y1}",
18516 "fucomi\t{%y1, %0|%0, %y1}",
18517 "fucomip\t{%y1, %0|%0, %y1}",
18528 mask = eflags_p << 3;
18529 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
18530 mask |= unordered_p << 1;
18531 mask |= stack_top_dies;
18533 gcc_assert (mask < 16);
18542 ix86_output_addr_vec_elt (FILE *file, int value)
18544 const char *directive = ASM_LONG;
18548 directive = ASM_QUAD;
18550 gcc_assert (!TARGET_64BIT);
18553 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
18557 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
18559 const char *directive = ASM_LONG;
18562 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
18563 directive = ASM_QUAD;
18565 gcc_assert (!TARGET_64BIT);
18567 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
18568 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
18569 fprintf (file, "%s%s%d-%s%d\n",
18570 directive, LPREFIX, value, LPREFIX, rel);
18571 else if (HAVE_AS_GOTOFF_IN_DATA)
18572 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
18574 else if (TARGET_MACHO)
18576 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
18577 machopic_output_function_base_name (file);
18582 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
18583 GOT_SYMBOL_NAME, LPREFIX, value);
18586 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
18590 ix86_expand_clear (rtx dest)
18594 /* We play register width games, which are only valid after reload. */
18595 gcc_assert (reload_completed);
18597 /* Avoid HImode and its attendant prefix byte. */
18598 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
18599 dest = gen_rtx_REG (SImode, REGNO (dest));
18600 tmp = gen_rtx_SET (dest, const0_rtx);
18602 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
18604 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18605 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
18611 /* X is an unchanging MEM. If it is a constant pool reference, return
18612 the constant pool rtx, else NULL. */
18615 maybe_get_pool_constant (rtx x)
18617 x = ix86_delegitimize_address (XEXP (x, 0));
18619 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
18620 return get_pool_constant (x);
18626 ix86_expand_move (machine_mode mode, rtx operands[])
18629 enum tls_model model;
18634 if (GET_CODE (op1) == SYMBOL_REF)
18638 model = SYMBOL_REF_TLS_MODEL (op1);
18641 op1 = legitimize_tls_address (op1, model, true);
18642 op1 = force_operand (op1, op0);
18645 op1 = convert_to_mode (mode, op1, 1);
18647 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
18650 else if (GET_CODE (op1) == CONST
18651 && GET_CODE (XEXP (op1, 0)) == PLUS
18652 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
18654 rtx addend = XEXP (XEXP (op1, 0), 1);
18655 rtx symbol = XEXP (XEXP (op1, 0), 0);
18658 model = SYMBOL_REF_TLS_MODEL (symbol);
18660 tmp = legitimize_tls_address (symbol, model, true);
18662 tmp = legitimize_pe_coff_symbol (symbol, true);
18666 tmp = force_operand (tmp, NULL);
18667 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
18668 op0, 1, OPTAB_DIRECT);
18671 op1 = convert_to_mode (mode, tmp, 1);
18675 if ((flag_pic || MACHOPIC_INDIRECT)
18676 && symbolic_operand (op1, mode))
18678 if (TARGET_MACHO && !TARGET_64BIT)
18681 /* dynamic-no-pic */
18682 if (MACHOPIC_INDIRECT)
18684 rtx temp = (op0 && REG_P (op0) && mode == Pmode)
18685 ? op0 : gen_reg_rtx (Pmode);
18686 op1 = machopic_indirect_data_reference (op1, temp);
18688 op1 = machopic_legitimize_pic_address (op1, mode,
18689 temp == op1 ? 0 : temp);
18691 if (op0 != op1 && GET_CODE (op0) != MEM)
18693 rtx insn = gen_rtx_SET (op0, op1);
18697 if (GET_CODE (op0) == MEM)
18698 op1 = force_reg (Pmode, op1);
18702 if (GET_CODE (temp) != REG)
18703 temp = gen_reg_rtx (Pmode);
18704 temp = legitimize_pic_address (op1, temp);
18709 /* dynamic-no-pic */
18715 op1 = force_reg (mode, op1);
18716 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
18718 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
18719 op1 = legitimize_pic_address (op1, reg);
18722 op1 = convert_to_mode (mode, op1, 1);
18729 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
18730 || !push_operand (op0, mode))
18732 op1 = force_reg (mode, op1);
18734 if (push_operand (op0, mode)
18735 && ! general_no_elim_operand (op1, mode))
18736 op1 = copy_to_mode_reg (mode, op1);
18738 /* Force large constants in 64bit compilation into register
18739 to get them CSEed. */
18740 if (can_create_pseudo_p ()
18741 && (mode == DImode) && TARGET_64BIT
18742 && immediate_operand (op1, mode)
18743 && !x86_64_zext_immediate_operand (op1, VOIDmode)
18744 && !register_operand (op0, mode)
18746 op1 = copy_to_mode_reg (mode, op1);
18748 if (can_create_pseudo_p ()
18749 && CONST_DOUBLE_P (op1))
18751 /* If we are loading a floating point constant to a register,
18752 force the value to memory now, since we'll get better code
18753 out the back end. */
18755 op1 = validize_mem (force_const_mem (mode, op1));
18756 if (!register_operand (op0, mode))
18758 rtx temp = gen_reg_rtx (mode);
18759 emit_insn (gen_rtx_SET (temp, op1));
18760 emit_move_insn (op0, temp);
18766 emit_insn (gen_rtx_SET (op0, op1));
18770 ix86_expand_vector_move (machine_mode mode, rtx operands[])
18772 rtx op0 = operands[0], op1 = operands[1];
18773 /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU
18774 psABI since the biggest alignment is 4 byte for IA MCU psABI. */
18775 unsigned int align = (TARGET_IAMCU
18776 ? GET_MODE_BITSIZE (mode)
18777 : GET_MODE_ALIGNMENT (mode));
18779 if (push_operand (op0, VOIDmode))
18780 op0 = emit_move_resolve_push (mode, op0);
18782 /* Force constants other than zero into memory. We do not know how
18783 the instructions used to build constants modify the upper 64 bits
18784 of the register, once we have that information we may be able
18785 to handle some of them more efficiently. */
18786 if (can_create_pseudo_p ()
18787 && register_operand (op0, mode)
18788 && (CONSTANT_P (op1)
18790 && CONSTANT_P (SUBREG_REG (op1))))
18791 && !standard_sse_constant_p (op1))
18792 op1 = validize_mem (force_const_mem (mode, op1));
18794 /* We need to check memory alignment for SSE mode since attribute
18795 can make operands unaligned. */
18796 if (can_create_pseudo_p ()
18797 && SSE_REG_MODE_P (mode)
18798 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
18799 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
18803 /* ix86_expand_vector_move_misalign() does not like constants ... */
18804 if (CONSTANT_P (op1)
18806 && CONSTANT_P (SUBREG_REG (op1))))
18807 op1 = validize_mem (force_const_mem (mode, op1));
18809 /* ... nor both arguments in memory. */
18810 if (!register_operand (op0, mode)
18811 && !register_operand (op1, mode))
18812 op1 = force_reg (mode, op1);
18814 tmp[0] = op0; tmp[1] = op1;
18815 ix86_expand_vector_move_misalign (mode, tmp);
18819 /* Make operand1 a register if it isn't already. */
18820 if (can_create_pseudo_p ()
18821 && !register_operand (op0, mode)
18822 && !register_operand (op1, mode))
18824 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
18828 emit_insn (gen_rtx_SET (op0, op1));
18831 /* Split 32-byte AVX unaligned load and store if needed. */
18834 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
18837 rtx (*extract) (rtx, rtx, rtx);
18838 rtx (*load_unaligned) (rtx, rtx);
18839 rtx (*store_unaligned) (rtx, rtx);
18842 switch (GET_MODE (op0))
18845 gcc_unreachable ();
18847 extract = gen_avx_vextractf128v32qi;
18848 load_unaligned = gen_avx_loaddquv32qi;
18849 store_unaligned = gen_avx_storedquv32qi;
18853 extract = gen_avx_vextractf128v8sf;
18854 load_unaligned = gen_avx_loadups256;
18855 store_unaligned = gen_avx_storeups256;
18859 extract = gen_avx_vextractf128v4df;
18860 load_unaligned = gen_avx_loadupd256;
18861 store_unaligned = gen_avx_storeupd256;
18868 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
18869 && optimize_insn_for_speed_p ())
18871 rtx r = gen_reg_rtx (mode);
18872 m = adjust_address (op1, mode, 0);
18873 emit_move_insn (r, m);
18874 m = adjust_address (op1, mode, 16);
18875 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
18876 emit_move_insn (op0, r);
18878 /* Normal *mov<mode>_internal pattern will handle
18879 unaligned loads just fine if misaligned_operand
18880 is true, and without the UNSPEC it can be combined
18881 with arithmetic instructions. */
18882 else if (misaligned_operand (op1, GET_MODE (op1)))
18883 emit_insn (gen_rtx_SET (op0, op1));
18885 emit_insn (load_unaligned (op0, op1));
18887 else if (MEM_P (op0))
18889 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
18890 && optimize_insn_for_speed_p ())
18892 m = adjust_address (op0, mode, 0);
18893 emit_insn (extract (m, op1, const0_rtx));
18894 m = adjust_address (op0, mode, 16);
18895 emit_insn (extract (m, op1, const1_rtx));
18898 emit_insn (store_unaligned (op0, op1));
18901 gcc_unreachable ();
18904 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
18905 straight to ix86_expand_vector_move. */
18906 /* Code generation for scalar reg-reg moves of single and double precision data:
18907 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
18911 if (x86_sse_partial_reg_dependency == true)
18916 Code generation for scalar loads of double precision data:
18917 if (x86_sse_split_regs == true)
18918 movlpd mem, reg (gas syntax)
18922 Code generation for unaligned packed loads of single precision data
18923 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
18924 if (x86_sse_unaligned_move_optimal)
18927 if (x86_sse_partial_reg_dependency == true)
18939 Code generation for unaligned packed loads of double precision data
18940 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
18941 if (x86_sse_unaligned_move_optimal)
18944 if (x86_sse_split_regs == true)
18957 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
18959 rtx op0, op1, orig_op0 = NULL_RTX, m;
18960 rtx (*load_unaligned) (rtx, rtx);
18961 rtx (*store_unaligned) (rtx, rtx);
18966 if (GET_MODE_SIZE (mode) == 64)
18968 switch (GET_MODE_CLASS (mode))
18970 case MODE_VECTOR_INT:
18972 if (GET_MODE (op0) != V16SImode)
18977 op0 = gen_reg_rtx (V16SImode);
18980 op0 = gen_lowpart (V16SImode, op0);
18982 op1 = gen_lowpart (V16SImode, op1);
18985 case MODE_VECTOR_FLOAT:
18986 switch (GET_MODE (op0))
18989 gcc_unreachable ();
18991 load_unaligned = gen_avx512f_loaddquv16si;
18992 store_unaligned = gen_avx512f_storedquv16si;
18995 load_unaligned = gen_avx512f_loadups512;
18996 store_unaligned = gen_avx512f_storeups512;
18999 load_unaligned = gen_avx512f_loadupd512;
19000 store_unaligned = gen_avx512f_storeupd512;
19005 emit_insn (load_unaligned (op0, op1));
19006 else if (MEM_P (op0))
19007 emit_insn (store_unaligned (op0, op1));
19009 gcc_unreachable ();
19011 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
19015 gcc_unreachable ();
19022 && GET_MODE_SIZE (mode) == 32)
19024 switch (GET_MODE_CLASS (mode))
19026 case MODE_VECTOR_INT:
19028 if (GET_MODE (op0) != V32QImode)
19033 op0 = gen_reg_rtx (V32QImode);
19036 op0 = gen_lowpart (V32QImode, op0);
19038 op1 = gen_lowpart (V32QImode, op1);
19041 case MODE_VECTOR_FLOAT:
19042 ix86_avx256_split_vector_move_misalign (op0, op1);
19044 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
19048 gcc_unreachable ();
19056 /* Normal *mov<mode>_internal pattern will handle
19057 unaligned loads just fine if misaligned_operand
19058 is true, and without the UNSPEC it can be combined
19059 with arithmetic instructions. */
19061 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
19062 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
19063 && misaligned_operand (op1, GET_MODE (op1)))
19064 emit_insn (gen_rtx_SET (op0, op1));
19065 /* ??? If we have typed data, then it would appear that using
19066 movdqu is the only way to get unaligned data loaded with
19068 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19070 if (GET_MODE (op0) != V16QImode)
19073 op0 = gen_reg_rtx (V16QImode);
19075 op1 = gen_lowpart (V16QImode, op1);
19076 /* We will eventually emit movups based on insn attributes. */
19077 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
19079 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
19081 else if (TARGET_SSE2 && mode == V2DFmode)
19086 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
19087 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19088 || optimize_insn_for_size_p ())
19090 /* We will eventually emit movups based on insn attributes. */
19091 emit_insn (gen_sse2_loadupd (op0, op1));
19095 /* When SSE registers are split into halves, we can avoid
19096 writing to the top half twice. */
19097 if (TARGET_SSE_SPLIT_REGS)
19099 emit_clobber (op0);
19104 /* ??? Not sure about the best option for the Intel chips.
19105 The following would seem to satisfy; the register is
19106 entirely cleared, breaking the dependency chain. We
19107 then store to the upper half, with a dependency depth
19108 of one. A rumor has it that Intel recommends two movsd
19109 followed by an unpacklpd, but this is unconfirmed. And
19110 given that the dependency depth of the unpacklpd would
19111 still be one, I'm not sure why this would be better. */
19112 zero = CONST0_RTX (V2DFmode);
19115 m = adjust_address (op1, DFmode, 0);
19116 emit_insn (gen_sse2_loadlpd (op0, zero, m));
19117 m = adjust_address (op1, DFmode, 8);
19118 emit_insn (gen_sse2_loadhpd (op0, op0, m));
19125 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
19126 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19127 || optimize_insn_for_size_p ())
19129 if (GET_MODE (op0) != V4SFmode)
19132 op0 = gen_reg_rtx (V4SFmode);
19134 op1 = gen_lowpart (V4SFmode, op1);
19135 emit_insn (gen_sse_loadups (op0, op1));
19137 emit_move_insn (orig_op0,
19138 gen_lowpart (GET_MODE (orig_op0), op0));
19142 if (mode != V4SFmode)
19143 t = gen_reg_rtx (V4SFmode);
19147 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
19148 emit_move_insn (t, CONST0_RTX (V4SFmode));
19152 m = adjust_address (op1, V2SFmode, 0);
19153 emit_insn (gen_sse_loadlps (t, t, m));
19154 m = adjust_address (op1, V2SFmode, 8);
19155 emit_insn (gen_sse_loadhps (t, t, m));
19156 if (mode != V4SFmode)
19157 emit_move_insn (op0, gen_lowpart (mode, t));
19160 else if (MEM_P (op0))
19162 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19164 op0 = gen_lowpart (V16QImode, op0);
19165 op1 = gen_lowpart (V16QImode, op1);
19166 /* We will eventually emit movups based on insn attributes. */
19167 emit_insn (gen_sse2_storedquv16qi (op0, op1));
19169 else if (TARGET_SSE2 && mode == V2DFmode)
19172 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
19173 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19174 || optimize_insn_for_size_p ())
19175 /* We will eventually emit movups based on insn attributes. */
19176 emit_insn (gen_sse2_storeupd (op0, op1));
19179 m = adjust_address (op0, DFmode, 0);
19180 emit_insn (gen_sse2_storelpd (m, op1));
19181 m = adjust_address (op0, DFmode, 8);
19182 emit_insn (gen_sse2_storehpd (m, op1));
19187 if (mode != V4SFmode)
19188 op1 = gen_lowpart (V4SFmode, op1);
19191 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
19192 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19193 || optimize_insn_for_size_p ())
19195 op0 = gen_lowpart (V4SFmode, op0);
19196 emit_insn (gen_sse_storeups (op0, op1));
19200 m = adjust_address (op0, V2SFmode, 0);
19201 emit_insn (gen_sse_storelps (m, op1));
19202 m = adjust_address (op0, V2SFmode, 8);
19203 emit_insn (gen_sse_storehps (m, op1));
19208 gcc_unreachable ();
19211 /* Helper function of ix86_fixup_binary_operands to canonicalize
19212 operand order. Returns true if the operands should be swapped. */
19215 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
19218 rtx dst = operands[0];
19219 rtx src1 = operands[1];
19220 rtx src2 = operands[2];
19222 /* If the operation is not commutative, we can't do anything. */
19223 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
19226 /* Highest priority is that src1 should match dst. */
19227 if (rtx_equal_p (dst, src1))
19229 if (rtx_equal_p (dst, src2))
19232 /* Next highest priority is that immediate constants come second. */
19233 if (immediate_operand (src2, mode))
19235 if (immediate_operand (src1, mode))
19238 /* Lowest priority is that memory references should come second. */
19248 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
19249 destination to use for the operation. If different from the true
19250 destination in operands[0], a copy operation will be required. */
19253 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
19256 rtx dst = operands[0];
19257 rtx src1 = operands[1];
19258 rtx src2 = operands[2];
19260 /* Canonicalize operand order. */
19261 if (ix86_swap_binary_operands_p (code, mode, operands))
19263 /* It is invalid to swap operands of different modes. */
19264 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
19266 std::swap (src1, src2);
19269 /* Both source operands cannot be in memory. */
19270 if (MEM_P (src1) && MEM_P (src2))
19272 /* Optimization: Only read from memory once. */
19273 if (rtx_equal_p (src1, src2))
19275 src2 = force_reg (mode, src2);
19278 else if (rtx_equal_p (dst, src1))
19279 src2 = force_reg (mode, src2);
19281 src1 = force_reg (mode, src1);
19284 /* If the destination is memory, and we do not have matching source
19285 operands, do things in registers. */
19286 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
19287 dst = gen_reg_rtx (mode);
19289 /* Source 1 cannot be a constant. */
19290 if (CONSTANT_P (src1))
19291 src1 = force_reg (mode, src1);
19293 /* Source 1 cannot be a non-matching memory. */
19294 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
19295 src1 = force_reg (mode, src1);
19297 /* Improve address combine. */
19299 && GET_MODE_CLASS (mode) == MODE_INT
19301 src2 = force_reg (mode, src2);
19303 operands[1] = src1;
19304 operands[2] = src2;
19308 /* Similarly, but assume that the destination has already been
19309 set up properly. */
19312 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
19313 machine_mode mode, rtx operands[])
19315 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
19316 gcc_assert (dst == operands[0]);
19319 /* Attempt to expand a binary operator. Make the expansion closer to the
19320 actual machine, then just general_operand, which will allow 3 separate
19321 memory references (one output, two input) in a single insn. */
19324 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
19327 rtx src1, src2, dst, op, clob;
19329 dst = ix86_fixup_binary_operands (code, mode, operands);
19330 src1 = operands[1];
19331 src2 = operands[2];
19333 /* Emit the instruction. */
19335 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2));
19337 if (reload_completed
19339 && !rtx_equal_p (dst, src1))
19341 /* This is going to be an LEA; avoid splitting it later. */
19346 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19347 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
19350 /* Fix up the destination if needed. */
19351 if (dst != operands[0])
19352 emit_move_insn (operands[0], dst);
19355 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
19356 the given OPERANDS. */
19359 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
19362 rtx op1 = NULL_RTX, op2 = NULL_RTX;
19363 if (SUBREG_P (operands[1]))
19368 else if (SUBREG_P (operands[2]))
19373 /* Optimize (__m128i) d | (__m128i) e and similar code
19374 when d and e are float vectors into float vector logical
19375 insn. In C/C++ without using intrinsics there is no other way
19376 to express vector logical operation on float vectors than
19377 to cast them temporarily to integer vectors. */
19379 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19380 && (SUBREG_P (op2) || GET_CODE (op2) == CONST_VECTOR)
19381 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
19382 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
19383 && SUBREG_BYTE (op1) == 0
19384 && (GET_CODE (op2) == CONST_VECTOR
19385 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
19386 && SUBREG_BYTE (op2) == 0))
19387 && can_create_pseudo_p ())
19390 switch (GET_MODE (SUBREG_REG (op1)))
19398 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
19399 if (GET_CODE (op2) == CONST_VECTOR)
19401 op2 = gen_lowpart (GET_MODE (dst), op2);
19402 op2 = force_reg (GET_MODE (dst), op2);
19407 op2 = SUBREG_REG (operands[2]);
19408 if (!vector_operand (op2, GET_MODE (dst)))
19409 op2 = force_reg (GET_MODE (dst), op2);
19411 op1 = SUBREG_REG (op1);
19412 if (!vector_operand (op1, GET_MODE (dst)))
19413 op1 = force_reg (GET_MODE (dst), op1);
19414 emit_insn (gen_rtx_SET (dst,
19415 gen_rtx_fmt_ee (code, GET_MODE (dst),
19417 emit_move_insn (operands[0], gen_lowpart (mode, dst));
19423 if (!vector_operand (operands[1], mode))
19424 operands[1] = force_reg (mode, operands[1]);
19425 if (!vector_operand (operands[2], mode))
19426 operands[2] = force_reg (mode, operands[2]);
19427 ix86_fixup_binary_operands_no_copy (code, mode, operands);
19428 emit_insn (gen_rtx_SET (operands[0],
19429 gen_rtx_fmt_ee (code, mode, operands[1],
19433 /* Return TRUE or FALSE depending on whether the binary operator meets the
19434 appropriate constraints. */
19437 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
19440 rtx dst = operands[0];
19441 rtx src1 = operands[1];
19442 rtx src2 = operands[2];
19444 /* Both source operands cannot be in memory. */
19445 if (MEM_P (src1) && MEM_P (src2))
19448 /* Canonicalize operand order for commutative operators. */
19449 if (ix86_swap_binary_operands_p (code, mode, operands))
19450 std::swap (src1, src2);
19452 /* If the destination is memory, we must have a matching source operand. */
19453 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
19456 /* Source 1 cannot be a constant. */
19457 if (CONSTANT_P (src1))
19460 /* Source 1 cannot be a non-matching memory. */
19461 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
19462 /* Support "andhi/andsi/anddi" as a zero-extending move. */
19463 return (code == AND
19466 || (TARGET_64BIT && mode == DImode))
19467 && satisfies_constraint_L (src2));
19472 /* Attempt to expand a unary operator. Make the expansion closer to the
19473 actual machine, then just general_operand, which will allow 2 separate
19474 memory references (one output, one input) in a single insn. */
19477 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
19480 bool matching_memory = false;
19481 rtx src, dst, op, clob;
19486 /* If the destination is memory, and we do not have matching source
19487 operands, do things in registers. */
19490 if (rtx_equal_p (dst, src))
19491 matching_memory = true;
19493 dst = gen_reg_rtx (mode);
19496 /* When source operand is memory, destination must match. */
19497 if (MEM_P (src) && !matching_memory)
19498 src = force_reg (mode, src);
19500 /* Emit the instruction. */
19502 op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src));
19508 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19509 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
19512 /* Fix up the destination if needed. */
19513 if (dst != operands[0])
19514 emit_move_insn (operands[0], dst);
19517 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
19518 divisor are within the range [0-255]. */
19521 ix86_split_idivmod (machine_mode mode, rtx operands[],
19524 rtx_code_label *end_label, *qimode_label;
19525 rtx insn, div, mod;
19526 rtx scratch, tmp0, tmp1, tmp2;
19527 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
19528 rtx (*gen_zero_extend) (rtx, rtx);
19529 rtx (*gen_test_ccno_1) (rtx, rtx);
19534 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
19535 gen_test_ccno_1 = gen_testsi_ccno_1;
19536 gen_zero_extend = gen_zero_extendqisi2;
19539 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
19540 gen_test_ccno_1 = gen_testdi_ccno_1;
19541 gen_zero_extend = gen_zero_extendqidi2;
19544 gcc_unreachable ();
19547 end_label = gen_label_rtx ();
19548 qimode_label = gen_label_rtx ();
19550 scratch = gen_reg_rtx (mode);
19552 /* Use 8bit unsigned divimod if dividend and divisor are within
19553 the range [0-255]. */
19554 emit_move_insn (scratch, operands[2]);
19555 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
19556 scratch, 1, OPTAB_DIRECT);
19557 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
19558 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
19559 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
19560 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
19561 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
19563 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0));
19564 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19565 JUMP_LABEL (insn) = qimode_label;
19567 /* Generate original signed/unsigned divimod. */
19568 div = gen_divmod4_1 (operands[0], operands[1],
19569 operands[2], operands[3]);
19572 /* Branch to the end. */
19573 emit_jump_insn (gen_jump (end_label));
19576 /* Generate 8bit unsigned divide. */
19577 emit_label (qimode_label);
19578 /* Don't use operands[0] for result of 8bit divide since not all
19579 registers support QImode ZERO_EXTRACT. */
19580 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
19581 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
19582 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
19583 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
19587 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
19588 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
19592 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
19593 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
19596 /* Extract remainder from AH. */
19597 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
19598 if (REG_P (operands[1]))
19599 insn = emit_move_insn (operands[1], tmp1);
19602 /* Need a new scratch register since the old one has result
19604 scratch = gen_reg_rtx (mode);
19605 emit_move_insn (scratch, tmp1);
19606 insn = emit_move_insn (operands[1], scratch);
19608 set_unique_reg_note (insn, REG_EQUAL, mod);
19610 /* Zero extend quotient from AL. */
19611 tmp1 = gen_lowpart (QImode, tmp0);
19612 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
19613 set_unique_reg_note (insn, REG_EQUAL, div);
19615 emit_label (end_label);
19618 #define LEA_MAX_STALL (3)
19619 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
19621 /* Increase given DISTANCE in half-cycles according to
19622 dependencies between PREV and NEXT instructions.
19623 Add 1 half-cycle if there is no dependency and
19624 go to next cycle if there is some dependecy. */
19626 static unsigned int
19627 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
19631 if (!prev || !next)
19632 return distance + (distance & 1) + 2;
19634 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
19635 return distance + 1;
19637 FOR_EACH_INSN_USE (use, next)
19638 FOR_EACH_INSN_DEF (def, prev)
19639 if (!DF_REF_IS_ARTIFICIAL (def)
19640 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
19641 return distance + (distance & 1) + 2;
19643 return distance + 1;
19646 /* Function checks if instruction INSN defines register number
19647 REGNO1 or REGNO2. */
19650 insn_defines_reg (unsigned int regno1, unsigned int regno2,
19655 FOR_EACH_INSN_DEF (def, insn)
19656 if (DF_REF_REG_DEF_P (def)
19657 && !DF_REF_IS_ARTIFICIAL (def)
19658 && (regno1 == DF_REF_REGNO (def)
19659 || regno2 == DF_REF_REGNO (def)))
19665 /* Function checks if instruction INSN uses register number
19666 REGNO as a part of address expression. */
19669 insn_uses_reg_mem (unsigned int regno, rtx insn)
19673 FOR_EACH_INSN_USE (use, insn)
19674 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
19680 /* Search backward for non-agu definition of register number REGNO1
19681 or register number REGNO2 in basic block starting from instruction
19682 START up to head of basic block or instruction INSN.
19684 Function puts true value into *FOUND var if definition was found
19685 and false otherwise.
19687 Distance in half-cycles between START and found instruction or head
19688 of BB is added to DISTANCE and returned. */
19691 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
19692 rtx_insn *insn, int distance,
19693 rtx_insn *start, bool *found)
19695 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
19696 rtx_insn *prev = start;
19697 rtx_insn *next = NULL;
19703 && distance < LEA_SEARCH_THRESHOLD)
19705 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
19707 distance = increase_distance (prev, next, distance);
19708 if (insn_defines_reg (regno1, regno2, prev))
19710 if (recog_memoized (prev) < 0
19711 || get_attr_type (prev) != TYPE_LEA)
19720 if (prev == BB_HEAD (bb))
19723 prev = PREV_INSN (prev);
19729 /* Search backward for non-agu definition of register number REGNO1
19730 or register number REGNO2 in INSN's basic block until
19731 1. Pass LEA_SEARCH_THRESHOLD instructions, or
19732 2. Reach neighbor BBs boundary, or
19733 3. Reach agu definition.
19734 Returns the distance between the non-agu definition point and INSN.
19735 If no definition point, returns -1. */
19738 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
19741 basic_block bb = BLOCK_FOR_INSN (insn);
19743 bool found = false;
19745 if (insn != BB_HEAD (bb))
19746 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
19747 distance, PREV_INSN (insn),
19750 if (!found && distance < LEA_SEARCH_THRESHOLD)
19754 bool simple_loop = false;
19756 FOR_EACH_EDGE (e, ei, bb->preds)
19759 simple_loop = true;
19764 distance = distance_non_agu_define_in_bb (regno1, regno2,
19766 BB_END (bb), &found);
19769 int shortest_dist = -1;
19770 bool found_in_bb = false;
19772 FOR_EACH_EDGE (e, ei, bb->preds)
19775 = distance_non_agu_define_in_bb (regno1, regno2,
19781 if (shortest_dist < 0)
19782 shortest_dist = bb_dist;
19783 else if (bb_dist > 0)
19784 shortest_dist = MIN (bb_dist, shortest_dist);
19790 distance = shortest_dist;
19794 /* get_attr_type may modify recog data. We want to make sure
19795 that recog data is valid for instruction INSN, on which
19796 distance_non_agu_define is called. INSN is unchanged here. */
19797 extract_insn_cached (insn);
19802 return distance >> 1;
19805 /* Return the distance in half-cycles between INSN and the next
19806 insn that uses register number REGNO in memory address added
19807 to DISTANCE. Return -1 if REGNO0 is set.
19809 Put true value into *FOUND if register usage was found and
19811 Put true value into *REDEFINED if register redefinition was
19812 found and false otherwise. */
19815 distance_agu_use_in_bb (unsigned int regno,
19816 rtx_insn *insn, int distance, rtx_insn *start,
19817 bool *found, bool *redefined)
19819 basic_block bb = NULL;
19820 rtx_insn *next = start;
19821 rtx_insn *prev = NULL;
19824 *redefined = false;
19826 if (start != NULL_RTX)
19828 bb = BLOCK_FOR_INSN (start);
19829 if (start != BB_HEAD (bb))
19830 /* If insn and start belong to the same bb, set prev to insn,
19831 so the call to increase_distance will increase the distance
19832 between insns by 1. */
19838 && distance < LEA_SEARCH_THRESHOLD)
19840 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
19842 distance = increase_distance(prev, next, distance);
19843 if (insn_uses_reg_mem (regno, next))
19845 /* Return DISTANCE if OP0 is used in memory
19846 address in NEXT. */
19851 if (insn_defines_reg (regno, INVALID_REGNUM, next))
19853 /* Return -1 if OP0 is set in NEXT. */
19861 if (next == BB_END (bb))
19864 next = NEXT_INSN (next);
19870 /* Return the distance between INSN and the next insn that uses
19871 register number REGNO0 in memory address. Return -1 if no such
19872 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
19875 distance_agu_use (unsigned int regno0, rtx_insn *insn)
19877 basic_block bb = BLOCK_FOR_INSN (insn);
19879 bool found = false;
19880 bool redefined = false;
19882 if (insn != BB_END (bb))
19883 distance = distance_agu_use_in_bb (regno0, insn, distance,
19885 &found, &redefined);
19887 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
19891 bool simple_loop = false;
19893 FOR_EACH_EDGE (e, ei, bb->succs)
19896 simple_loop = true;
19901 distance = distance_agu_use_in_bb (regno0, insn,
19902 distance, BB_HEAD (bb),
19903 &found, &redefined);
19906 int shortest_dist = -1;
19907 bool found_in_bb = false;
19908 bool redefined_in_bb = false;
19910 FOR_EACH_EDGE (e, ei, bb->succs)
19913 = distance_agu_use_in_bb (regno0, insn,
19914 distance, BB_HEAD (e->dest),
19915 &found_in_bb, &redefined_in_bb);
19918 if (shortest_dist < 0)
19919 shortest_dist = bb_dist;
19920 else if (bb_dist > 0)
19921 shortest_dist = MIN (bb_dist, shortest_dist);
19927 distance = shortest_dist;
19931 if (!found || redefined)
19934 return distance >> 1;
19937 /* Define this macro to tune LEA priority vs ADD, it take effect when
19938 there is a dilemma of choicing LEA or ADD
19939 Negative value: ADD is more preferred than LEA
19941 Positive value: LEA is more preferred than ADD*/
19942 #define IX86_LEA_PRIORITY 0
19944 /* Return true if usage of lea INSN has performance advantage
19945 over a sequence of instructions. Instructions sequence has
19946 SPLIT_COST cycles higher latency than lea latency. */
19949 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
19950 unsigned int regno2, int split_cost, bool has_scale)
19952 int dist_define, dist_use;
19954 /* For Silvermont if using a 2-source or 3-source LEA for
19955 non-destructive destination purposes, or due to wanting
19956 ability to use SCALE, the use of LEA is justified. */
19957 if (TARGET_SILVERMONT || TARGET_INTEL)
19961 if (split_cost < 1)
19963 if (regno0 == regno1 || regno0 == regno2)
19968 dist_define = distance_non_agu_define (regno1, regno2, insn);
19969 dist_use = distance_agu_use (regno0, insn);
19971 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
19973 /* If there is no non AGU operand definition, no AGU
19974 operand usage and split cost is 0 then both lea
19975 and non lea variants have same priority. Currently
19976 we prefer lea for 64 bit code and non lea on 32 bit
19978 if (dist_use < 0 && split_cost == 0)
19979 return TARGET_64BIT || IX86_LEA_PRIORITY;
19984 /* With longer definitions distance lea is more preferable.
19985 Here we change it to take into account splitting cost and
19987 dist_define += split_cost + IX86_LEA_PRIORITY;
19989 /* If there is no use in memory addess then we just check
19990 that split cost exceeds AGU stall. */
19992 return dist_define > LEA_MAX_STALL;
19994 /* If this insn has both backward non-agu dependence and forward
19995 agu dependence, the one with short distance takes effect. */
19996 return dist_define >= dist_use;
19999 /* Return true if it is legal to clobber flags by INSN and
20000 false otherwise. */
20003 ix86_ok_to_clobber_flags (rtx_insn *insn)
20005 basic_block bb = BLOCK_FOR_INSN (insn);
20011 if (NONDEBUG_INSN_P (insn))
20013 FOR_EACH_INSN_USE (use, insn)
20014 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
20017 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
20021 if (insn == BB_END (bb))
20024 insn = NEXT_INSN (insn);
20027 live = df_get_live_out(bb);
20028 return !REGNO_REG_SET_P (live, FLAGS_REG);
20031 /* Return true if we need to split op0 = op1 + op2 into a sequence of
20032 move and add to avoid AGU stalls. */
20035 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
20037 unsigned int regno0, regno1, regno2;
20039 /* Check if we need to optimize. */
20040 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20043 /* Check it is correct to split here. */
20044 if (!ix86_ok_to_clobber_flags(insn))
20047 regno0 = true_regnum (operands[0]);
20048 regno1 = true_regnum (operands[1]);
20049 regno2 = true_regnum (operands[2]);
20051 /* We need to split only adds with non destructive
20052 destination operand. */
20053 if (regno0 == regno1 || regno0 == regno2)
20056 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
20059 /* Return true if we should emit lea instruction instead of mov
20063 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
20065 unsigned int regno0, regno1;
20067 /* Check if we need to optimize. */
20068 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20071 /* Use lea for reg to reg moves only. */
20072 if (!REG_P (operands[0]) || !REG_P (operands[1]))
20075 regno0 = true_regnum (operands[0]);
20076 regno1 = true_regnum (operands[1]);
20078 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
20081 /* Return true if we need to split lea into a sequence of
20082 instructions to avoid AGU stalls. */
20085 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
20087 unsigned int regno0, regno1, regno2;
20089 struct ix86_address parts;
20092 /* Check we need to optimize. */
20093 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
20096 /* The "at least two components" test below might not catch simple
20097 move or zero extension insns if parts.base is non-NULL and parts.disp
20098 is const0_rtx as the only components in the address, e.g. if the
20099 register is %rbp or %r13. As this test is much cheaper and moves or
20100 zero extensions are the common case, do this check first. */
20101 if (REG_P (operands[1])
20102 || (SImode_address_operand (operands[1], VOIDmode)
20103 && REG_P (XEXP (operands[1], 0))))
20106 /* Check if it is OK to split here. */
20107 if (!ix86_ok_to_clobber_flags (insn))
20110 ok = ix86_decompose_address (operands[1], &parts);
20113 /* There should be at least two components in the address. */
20114 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
20115 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
20118 /* We should not split into add if non legitimate pic
20119 operand is used as displacement. */
20120 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
20123 regno0 = true_regnum (operands[0]) ;
20124 regno1 = INVALID_REGNUM;
20125 regno2 = INVALID_REGNUM;
20128 regno1 = true_regnum (parts.base);
20130 regno2 = true_regnum (parts.index);
20134 /* Compute how many cycles we will add to execution time
20135 if split lea into a sequence of instructions. */
20136 if (parts.base || parts.index)
20138 /* Have to use mov instruction if non desctructive
20139 destination form is used. */
20140 if (regno1 != regno0 && regno2 != regno0)
20143 /* Have to add index to base if both exist. */
20144 if (parts.base && parts.index)
20147 /* Have to use shift and adds if scale is 2 or greater. */
20148 if (parts.scale > 1)
20150 if (regno0 != regno1)
20152 else if (regno2 == regno0)
20155 split_cost += parts.scale;
20158 /* Have to use add instruction with immediate if
20159 disp is non zero. */
20160 if (parts.disp && parts.disp != const0_rtx)
20163 /* Subtract the price of lea. */
20167 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
20171 /* Emit x86 binary operand CODE in mode MODE, where the first operand
20172 matches destination. RTX includes clobber of FLAGS_REG. */
20175 ix86_emit_binop (enum rtx_code code, machine_mode mode,
20180 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
20181 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20183 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20186 /* Return true if regno1 def is nearest to the insn. */
20189 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
20191 rtx_insn *prev = insn;
20192 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
20196 while (prev && prev != start)
20198 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
20200 prev = PREV_INSN (prev);
20203 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
20205 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
20207 prev = PREV_INSN (prev);
20210 /* None of the regs is defined in the bb. */
20214 /* Split lea instructions into a sequence of instructions
20215 which are executed on ALU to avoid AGU stalls.
20216 It is assumed that it is allowed to clobber flags register
20217 at lea position. */
20220 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
20222 unsigned int regno0, regno1, regno2;
20223 struct ix86_address parts;
20227 ok = ix86_decompose_address (operands[1], &parts);
20230 target = gen_lowpart (mode, operands[0]);
20232 regno0 = true_regnum (target);
20233 regno1 = INVALID_REGNUM;
20234 regno2 = INVALID_REGNUM;
20238 parts.base = gen_lowpart (mode, parts.base);
20239 regno1 = true_regnum (parts.base);
20244 parts.index = gen_lowpart (mode, parts.index);
20245 regno2 = true_regnum (parts.index);
20249 parts.disp = gen_lowpart (mode, parts.disp);
20251 if (parts.scale > 1)
20253 /* Case r1 = r1 + ... */
20254 if (regno1 == regno0)
20256 /* If we have a case r1 = r1 + C * r2 then we
20257 should use multiplication which is very
20258 expensive. Assume cost model is wrong if we
20259 have such case here. */
20260 gcc_assert (regno2 != regno0);
20262 for (adds = parts.scale; adds > 0; adds--)
20263 ix86_emit_binop (PLUS, mode, target, parts.index);
20267 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
20268 if (regno0 != regno2)
20269 emit_insn (gen_rtx_SET (target, parts.index));
20271 /* Use shift for scaling. */
20272 ix86_emit_binop (ASHIFT, mode, target,
20273 GEN_INT (exact_log2 (parts.scale)));
20276 ix86_emit_binop (PLUS, mode, target, parts.base);
20278 if (parts.disp && parts.disp != const0_rtx)
20279 ix86_emit_binop (PLUS, mode, target, parts.disp);
20282 else if (!parts.base && !parts.index)
20284 gcc_assert(parts.disp);
20285 emit_insn (gen_rtx_SET (target, parts.disp));
20291 if (regno0 != regno2)
20292 emit_insn (gen_rtx_SET (target, parts.index));
20294 else if (!parts.index)
20296 if (regno0 != regno1)
20297 emit_insn (gen_rtx_SET (target, parts.base));
20301 if (regno0 == regno1)
20303 else if (regno0 == regno2)
20309 /* Find better operand for SET instruction, depending
20310 on which definition is farther from the insn. */
20311 if (find_nearest_reg_def (insn, regno1, regno2))
20312 tmp = parts.index, tmp1 = parts.base;
20314 tmp = parts.base, tmp1 = parts.index;
20316 emit_insn (gen_rtx_SET (target, tmp));
20318 if (parts.disp && parts.disp != const0_rtx)
20319 ix86_emit_binop (PLUS, mode, target, parts.disp);
20321 ix86_emit_binop (PLUS, mode, target, tmp1);
20325 ix86_emit_binop (PLUS, mode, target, tmp);
20328 if (parts.disp && parts.disp != const0_rtx)
20329 ix86_emit_binop (PLUS, mode, target, parts.disp);
20333 /* Return true if it is ok to optimize an ADD operation to LEA
20334 operation to avoid flag register consumation. For most processors,
20335 ADD is faster than LEA. For the processors like BONNELL, if the
20336 destination register of LEA holds an actual address which will be
20337 used soon, LEA is better and otherwise ADD is better. */
20340 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
20342 unsigned int regno0 = true_regnum (operands[0]);
20343 unsigned int regno1 = true_regnum (operands[1]);
20344 unsigned int regno2 = true_regnum (operands[2]);
20346 /* If a = b + c, (a!=b && a!=c), must use lea form. */
20347 if (regno0 != regno1 && regno0 != regno2)
20350 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20353 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
20356 /* Return true if destination reg of SET_BODY is shift count of
20360 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
20366 /* Retrieve destination of SET_BODY. */
20367 switch (GET_CODE (set_body))
20370 set_dest = SET_DEST (set_body);
20371 if (!set_dest || !REG_P (set_dest))
20375 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
20376 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
20384 /* Retrieve shift count of USE_BODY. */
20385 switch (GET_CODE (use_body))
20388 shift_rtx = XEXP (use_body, 1);
20391 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
20392 if (ix86_dep_by_shift_count_body (set_body,
20393 XVECEXP (use_body, 0, i)))
20401 && (GET_CODE (shift_rtx) == ASHIFT
20402 || GET_CODE (shift_rtx) == LSHIFTRT
20403 || GET_CODE (shift_rtx) == ASHIFTRT
20404 || GET_CODE (shift_rtx) == ROTATE
20405 || GET_CODE (shift_rtx) == ROTATERT))
20407 rtx shift_count = XEXP (shift_rtx, 1);
20409 /* Return true if shift count is dest of SET_BODY. */
20410 if (REG_P (shift_count))
20412 /* Add check since it can be invoked before register
20413 allocation in pre-reload schedule. */
20414 if (reload_completed
20415 && true_regnum (set_dest) == true_regnum (shift_count))
20417 else if (REGNO(set_dest) == REGNO(shift_count))
20425 /* Return true if destination reg of SET_INSN is shift count of
20429 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
20431 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
20432 PATTERN (use_insn));
20435 /* Return TRUE or FALSE depending on whether the unary operator meets the
20436 appropriate constraints. */
20439 ix86_unary_operator_ok (enum rtx_code,
20443 /* If one of operands is memory, source and destination must match. */
20444 if ((MEM_P (operands[0])
20445 || MEM_P (operands[1]))
20446 && ! rtx_equal_p (operands[0], operands[1]))
20451 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
20452 are ok, keeping in mind the possible movddup alternative. */
20455 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
20457 if (MEM_P (operands[0]))
20458 return rtx_equal_p (operands[0], operands[1 + high]);
20459 if (MEM_P (operands[1]) && MEM_P (operands[2]))
20460 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
20464 /* Post-reload splitter for converting an SF or DFmode value in an
20465 SSE register into an unsigned SImode. */
20468 ix86_split_convert_uns_si_sse (rtx operands[])
20470 machine_mode vecmode;
20471 rtx value, large, zero_or_two31, input, two31, x;
20473 large = operands[1];
20474 zero_or_two31 = operands[2];
20475 input = operands[3];
20476 two31 = operands[4];
20477 vecmode = GET_MODE (large);
20478 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
20480 /* Load up the value into the low element. We must ensure that the other
20481 elements are valid floats -- zero is the easiest such value. */
20484 if (vecmode == V4SFmode)
20485 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
20487 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
20491 input = gen_rtx_REG (vecmode, REGNO (input));
20492 emit_move_insn (value, CONST0_RTX (vecmode));
20493 if (vecmode == V4SFmode)
20494 emit_insn (gen_sse_movss (value, value, input));
20496 emit_insn (gen_sse2_movsd (value, value, input));
20499 emit_move_insn (large, two31);
20500 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
20502 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
20503 emit_insn (gen_rtx_SET (large, x));
20505 x = gen_rtx_AND (vecmode, zero_or_two31, large);
20506 emit_insn (gen_rtx_SET (zero_or_two31, x));
20508 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
20509 emit_insn (gen_rtx_SET (value, x));
20511 large = gen_rtx_REG (V4SImode, REGNO (large));
20512 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
20514 x = gen_rtx_REG (V4SImode, REGNO (value));
20515 if (vecmode == V4SFmode)
20516 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
20518 emit_insn (gen_sse2_cvttpd2dq (x, value));
20521 emit_insn (gen_xorv4si3 (value, value, large));
20524 /* Convert an unsigned DImode value into a DFmode, using only SSE.
20525 Expects the 64-bit DImode to be supplied in a pair of integral
20526 registers. Requires SSE2; will use SSE3 if available. For x86_32,
20527 -mfpmath=sse, !optimize_size only. */
20530 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
20532 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
20533 rtx int_xmm, fp_xmm;
20534 rtx biases, exponents;
20537 int_xmm = gen_reg_rtx (V4SImode);
20538 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
20539 emit_insn (gen_movdi_to_sse (int_xmm, input));
20540 else if (TARGET_SSE_SPLIT_REGS)
20542 emit_clobber (int_xmm);
20543 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
20547 x = gen_reg_rtx (V2DImode);
20548 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
20549 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
20552 x = gen_rtx_CONST_VECTOR (V4SImode,
20553 gen_rtvec (4, GEN_INT (0x43300000UL),
20554 GEN_INT (0x45300000UL),
20555 const0_rtx, const0_rtx));
20556 exponents = validize_mem (force_const_mem (V4SImode, x));
20558 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
20559 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
20561 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
20562 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
20563 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
20564 (0x1.0p84 + double(fp_value_hi_xmm)).
20565 Note these exponents differ by 32. */
20567 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
20569 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
20570 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
20571 real_ldexp (&bias_lo_rvt, &dconst1, 52);
20572 real_ldexp (&bias_hi_rvt, &dconst1, 84);
20573 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
20574 x = const_double_from_real_value (bias_hi_rvt, DFmode);
20575 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
20576 biases = validize_mem (force_const_mem (V2DFmode, biases));
20577 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
20579 /* Add the upper and lower DFmode values together. */
20581 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
20584 x = copy_to_mode_reg (V2DFmode, fp_xmm);
20585 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
20586 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
20589 ix86_expand_vector_extract (false, target, fp_xmm, 0);
20592 /* Not used, but eases macroization of patterns. */
20594 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
20596 gcc_unreachable ();
20599 /* Convert an unsigned SImode value into a DFmode. Only currently used
20600 for SSE, but applicable anywhere. */
20603 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
20605 REAL_VALUE_TYPE TWO31r;
20608 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
20609 NULL, 1, OPTAB_DIRECT);
20611 fp = gen_reg_rtx (DFmode);
20612 emit_insn (gen_floatsidf2 (fp, x));
20614 real_ldexp (&TWO31r, &dconst1, 31);
20615 x = const_double_from_real_value (TWO31r, DFmode);
20617 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
20619 emit_move_insn (target, x);
20622 /* Convert a signed DImode value into a DFmode. Only used for SSE in
20623 32-bit mode; otherwise we have a direct convert instruction. */
20626 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
20628 REAL_VALUE_TYPE TWO32r;
20629 rtx fp_lo, fp_hi, x;
20631 fp_lo = gen_reg_rtx (DFmode);
20632 fp_hi = gen_reg_rtx (DFmode);
20634 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
20636 real_ldexp (&TWO32r, &dconst1, 32);
20637 x = const_double_from_real_value (TWO32r, DFmode);
20638 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
20640 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
20642 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
20645 emit_move_insn (target, x);
20648 /* Convert an unsigned SImode value into a SFmode, using only SSE.
20649 For x86_32, -mfpmath=sse, !optimize_size only. */
20651 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
20653 REAL_VALUE_TYPE ONE16r;
20654 rtx fp_hi, fp_lo, int_hi, int_lo, x;
20656 real_ldexp (&ONE16r, &dconst1, 16);
20657 x = const_double_from_real_value (ONE16r, SFmode);
20658 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
20659 NULL, 0, OPTAB_DIRECT);
20660 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
20661 NULL, 0, OPTAB_DIRECT);
20662 fp_hi = gen_reg_rtx (SFmode);
20663 fp_lo = gen_reg_rtx (SFmode);
20664 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
20665 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
20666 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
20668 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
20670 if (!rtx_equal_p (target, fp_hi))
20671 emit_move_insn (target, fp_hi);
20674 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
20675 a vector of unsigned ints VAL to vector of floats TARGET. */
20678 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
20681 REAL_VALUE_TYPE TWO16r;
20682 machine_mode intmode = GET_MODE (val);
20683 machine_mode fltmode = GET_MODE (target);
20684 rtx (*cvt) (rtx, rtx);
20686 if (intmode == V4SImode)
20687 cvt = gen_floatv4siv4sf2;
20689 cvt = gen_floatv8siv8sf2;
20690 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
20691 tmp[0] = force_reg (intmode, tmp[0]);
20692 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
20694 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
20695 NULL_RTX, 1, OPTAB_DIRECT);
20696 tmp[3] = gen_reg_rtx (fltmode);
20697 emit_insn (cvt (tmp[3], tmp[1]));
20698 tmp[4] = gen_reg_rtx (fltmode);
20699 emit_insn (cvt (tmp[4], tmp[2]));
20700 real_ldexp (&TWO16r, &dconst1, 16);
20701 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
20702 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
20703 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
20705 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
20707 if (tmp[7] != target)
20708 emit_move_insn (target, tmp[7]);
20711 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
20712 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
20713 This is done by doing just signed conversion if < 0x1p31, and otherwise by
20714 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
20717 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
20719 REAL_VALUE_TYPE TWO31r;
20720 rtx two31r, tmp[4];
20721 machine_mode mode = GET_MODE (val);
20722 machine_mode scalarmode = GET_MODE_INNER (mode);
20723 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
20724 rtx (*cmp) (rtx, rtx, rtx, rtx);
20727 for (i = 0; i < 3; i++)
20728 tmp[i] = gen_reg_rtx (mode);
20729 real_ldexp (&TWO31r, &dconst1, 31);
20730 two31r = const_double_from_real_value (TWO31r, scalarmode);
20731 two31r = ix86_build_const_vector (mode, 1, two31r);
20732 two31r = force_reg (mode, two31r);
20735 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
20736 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
20737 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
20738 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
20739 default: gcc_unreachable ();
20741 tmp[3] = gen_rtx_LE (mode, two31r, val);
20742 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
20743 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
20745 if (intmode == V4SImode || TARGET_AVX2)
20746 *xorp = expand_simple_binop (intmode, ASHIFT,
20747 gen_lowpart (intmode, tmp[0]),
20748 GEN_INT (31), NULL_RTX, 0,
20752 rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
20753 two31 = ix86_build_const_vector (intmode, 1, two31);
20754 *xorp = expand_simple_binop (intmode, AND,
20755 gen_lowpart (intmode, tmp[0]),
20756 two31, NULL_RTX, 0,
20759 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
20763 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
20764 then replicate the value for all elements of the vector
20768 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
20772 machine_mode scalar_mode;
20795 n_elt = GET_MODE_NUNITS (mode);
20796 v = rtvec_alloc (n_elt);
20797 scalar_mode = GET_MODE_INNER (mode);
20799 RTVEC_ELT (v, 0) = value;
20801 for (i = 1; i < n_elt; ++i)
20802 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
20804 return gen_rtx_CONST_VECTOR (mode, v);
20807 gcc_unreachable ();
20811 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
20812 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
20813 for an SSE register. If VECT is true, then replicate the mask for
20814 all elements of the vector register. If INVERT is true, then create
20815 a mask excluding the sign bit. */
20818 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
20820 machine_mode vec_mode, imode;
20848 vec_mode = VOIDmode;
20853 gcc_unreachable ();
20856 machine_mode inner_mode = GET_MODE_INNER (mode);
20857 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
20858 GET_MODE_BITSIZE (inner_mode));
20860 w = wi::bit_not (w);
20862 /* Force this value into the low part of a fp vector constant. */
20863 mask = immed_wide_int_const (w, imode);
20864 mask = gen_lowpart (inner_mode, mask);
20866 if (vec_mode == VOIDmode)
20867 return force_reg (inner_mode, mask);
20869 v = ix86_build_const_vector (vec_mode, vect, mask);
20870 return force_reg (vec_mode, v);
20873 /* Generate code for floating point ABS or NEG. */
20876 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
20879 rtx mask, set, dst, src;
20880 bool use_sse = false;
20881 bool vector_mode = VECTOR_MODE_P (mode);
20882 machine_mode vmode = mode;
20886 else if (mode == TFmode)
20888 else if (TARGET_SSE_MATH)
20890 use_sse = SSE_FLOAT_MODE_P (mode);
20891 if (mode == SFmode)
20893 else if (mode == DFmode)
20897 /* NEG and ABS performed with SSE use bitwise mask operations.
20898 Create the appropriate mask now. */
20900 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
20907 set = gen_rtx_fmt_e (code, mode, src);
20908 set = gen_rtx_SET (dst, set);
20915 use = gen_rtx_USE (VOIDmode, mask);
20917 par = gen_rtvec (2, set, use);
20920 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20921 par = gen_rtvec (3, set, use, clob);
20923 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
20929 /* Expand a copysign operation. Special case operand 0 being a constant. */
20932 ix86_expand_copysign (rtx operands[])
20934 machine_mode mode, vmode;
20935 rtx dest, op0, op1, mask, nmask;
20937 dest = operands[0];
20941 mode = GET_MODE (dest);
20943 if (mode == SFmode)
20945 else if (mode == DFmode)
20950 if (CONST_DOUBLE_P (op0))
20952 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
20954 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
20955 op0 = simplify_unary_operation (ABS, mode, op0, mode);
20957 if (mode == SFmode || mode == DFmode)
20959 if (op0 == CONST0_RTX (mode))
20960 op0 = CONST0_RTX (vmode);
20963 rtx v = ix86_build_const_vector (vmode, false, op0);
20965 op0 = force_reg (vmode, v);
20968 else if (op0 != CONST0_RTX (mode))
20969 op0 = force_reg (mode, op0);
20971 mask = ix86_build_signbit_mask (vmode, 0, 0);
20973 if (mode == SFmode)
20974 copysign_insn = gen_copysignsf3_const;
20975 else if (mode == DFmode)
20976 copysign_insn = gen_copysigndf3_const;
20978 copysign_insn = gen_copysigntf3_const;
20980 emit_insn (copysign_insn (dest, op0, op1, mask));
20984 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
20986 nmask = ix86_build_signbit_mask (vmode, 0, 1);
20987 mask = ix86_build_signbit_mask (vmode, 0, 0);
20989 if (mode == SFmode)
20990 copysign_insn = gen_copysignsf3_var;
20991 else if (mode == DFmode)
20992 copysign_insn = gen_copysigndf3_var;
20994 copysign_insn = gen_copysigntf3_var;
20996 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
21000 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
21001 be a constant, and so has already been expanded into a vector constant. */
21004 ix86_split_copysign_const (rtx operands[])
21006 machine_mode mode, vmode;
21007 rtx dest, op0, mask, x;
21009 dest = operands[0];
21011 mask = operands[3];
21013 mode = GET_MODE (dest);
21014 vmode = GET_MODE (mask);
21016 dest = simplify_gen_subreg (vmode, dest, mode, 0);
21017 x = gen_rtx_AND (vmode, dest, mask);
21018 emit_insn (gen_rtx_SET (dest, x));
21020 if (op0 != CONST0_RTX (vmode))
21022 x = gen_rtx_IOR (vmode, dest, op0);
21023 emit_insn (gen_rtx_SET (dest, x));
21027 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
21028 so we have to do two masks. */
21031 ix86_split_copysign_var (rtx operands[])
21033 machine_mode mode, vmode;
21034 rtx dest, scratch, op0, op1, mask, nmask, x;
21036 dest = operands[0];
21037 scratch = operands[1];
21040 nmask = operands[4];
21041 mask = operands[5];
21043 mode = GET_MODE (dest);
21044 vmode = GET_MODE (mask);
21046 if (rtx_equal_p (op0, op1))
21048 /* Shouldn't happen often (it's useless, obviously), but when it does
21049 we'd generate incorrect code if we continue below. */
21050 emit_move_insn (dest, op0);
21054 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
21056 gcc_assert (REGNO (op1) == REGNO (scratch));
21058 x = gen_rtx_AND (vmode, scratch, mask);
21059 emit_insn (gen_rtx_SET (scratch, x));
21062 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
21063 x = gen_rtx_NOT (vmode, dest);
21064 x = gen_rtx_AND (vmode, x, op0);
21065 emit_insn (gen_rtx_SET (dest, x));
21069 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
21071 x = gen_rtx_AND (vmode, scratch, mask);
21073 else /* alternative 2,4 */
21075 gcc_assert (REGNO (mask) == REGNO (scratch));
21076 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
21077 x = gen_rtx_AND (vmode, scratch, op1);
21079 emit_insn (gen_rtx_SET (scratch, x));
21081 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
21083 dest = simplify_gen_subreg (vmode, op0, mode, 0);
21084 x = gen_rtx_AND (vmode, dest, nmask);
21086 else /* alternative 3,4 */
21088 gcc_assert (REGNO (nmask) == REGNO (dest));
21090 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
21091 x = gen_rtx_AND (vmode, dest, op0);
21093 emit_insn (gen_rtx_SET (dest, x));
21096 x = gen_rtx_IOR (vmode, dest, scratch);
21097 emit_insn (gen_rtx_SET (dest, x));
21100 /* Return TRUE or FALSE depending on whether the first SET in INSN
21101 has source and destination with matching CC modes, and that the
21102 CC mode is at least as constrained as REQ_MODE. */
21105 ix86_match_ccmode (rtx insn, machine_mode req_mode)
21108 machine_mode set_mode;
21110 set = PATTERN (insn);
21111 if (GET_CODE (set) == PARALLEL)
21112 set = XVECEXP (set, 0, 0);
21113 gcc_assert (GET_CODE (set) == SET);
21114 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
21116 set_mode = GET_MODE (SET_DEST (set));
21120 if (req_mode != CCNOmode
21121 && (req_mode != CCmode
21122 || XEXP (SET_SRC (set), 1) != const0_rtx))
21126 if (req_mode == CCGCmode)
21130 if (req_mode == CCGOCmode || req_mode == CCNOmode)
21134 if (req_mode == CCZmode)
21145 if (set_mode != req_mode)
21150 gcc_unreachable ();
21153 return GET_MODE (SET_SRC (set)) == set_mode;
21156 /* Generate insn patterns to do an integer compare of OPERANDS. */
21159 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
21161 machine_mode cmpmode;
21164 cmpmode = SELECT_CC_MODE (code, op0, op1);
21165 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
21167 /* This is very simple, but making the interface the same as in the
21168 FP case makes the rest of the code easier. */
21169 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
21170 emit_insn (gen_rtx_SET (flags, tmp));
21172 /* Return the test that should be put into the flags user, i.e.
21173 the bcc, scc, or cmov instruction. */
21174 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
21177 /* Figure out whether to use ordered or unordered fp comparisons.
21178 Return the appropriate mode to use. */
21181 ix86_fp_compare_mode (enum rtx_code)
21183 /* ??? In order to make all comparisons reversible, we do all comparisons
21184 non-trapping when compiling for IEEE. Once gcc is able to distinguish
21185 all forms trapping and nontrapping comparisons, we can make inequality
21186 comparisons trapping again, since it results in better code when using
21187 FCOM based compares. */
21188 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
21192 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
21194 machine_mode mode = GET_MODE (op0);
21196 if (SCALAR_FLOAT_MODE_P (mode))
21198 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
21199 return ix86_fp_compare_mode (code);
21204 /* Only zero flag is needed. */
21205 case EQ: /* ZF=0 */
21206 case NE: /* ZF!=0 */
21208 /* Codes needing carry flag. */
21209 case GEU: /* CF=0 */
21210 case LTU: /* CF=1 */
21211 /* Detect overflow checks. They need just the carry flag. */
21212 if (GET_CODE (op0) == PLUS
21213 && (rtx_equal_p (op1, XEXP (op0, 0))
21214 || rtx_equal_p (op1, XEXP (op0, 1))))
21218 case GTU: /* CF=0 & ZF=0 */
21219 case LEU: /* CF=1 | ZF=1 */
21221 /* Codes possibly doable only with sign flag when
21222 comparing against zero. */
21223 case GE: /* SF=OF or SF=0 */
21224 case LT: /* SF<>OF or SF=1 */
21225 if (op1 == const0_rtx)
21228 /* For other cases Carry flag is not required. */
21230 /* Codes doable only with sign flag when comparing
21231 against zero, but we miss jump instruction for it
21232 so we need to use relational tests against overflow
21233 that thus needs to be zero. */
21234 case GT: /* ZF=0 & SF=OF */
21235 case LE: /* ZF=1 | SF<>OF */
21236 if (op1 == const0_rtx)
21240 /* strcmp pattern do (use flags) and combine may ask us for proper
21245 gcc_unreachable ();
21249 /* Return the fixed registers used for condition codes. */
21252 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
21259 /* If two condition code modes are compatible, return a condition code
21260 mode which is compatible with both. Otherwise, return
21263 static machine_mode
21264 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
21269 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
21272 if ((m1 == CCGCmode && m2 == CCGOCmode)
21273 || (m1 == CCGOCmode && m2 == CCGCmode))
21276 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
21278 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
21284 gcc_unreachable ();
21316 /* These are only compatible with themselves, which we already
21323 /* Return a comparison we can do and that it is equivalent to
21324 swap_condition (code) apart possibly from orderedness.
21325 But, never change orderedness if TARGET_IEEE_FP, returning
21326 UNKNOWN in that case if necessary. */
21328 static enum rtx_code
21329 ix86_fp_swap_condition (enum rtx_code code)
21333 case GT: /* GTU - CF=0 & ZF=0 */
21334 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
21335 case GE: /* GEU - CF=0 */
21336 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
21337 case UNLT: /* LTU - CF=1 */
21338 return TARGET_IEEE_FP ? UNKNOWN : GT;
21339 case UNLE: /* LEU - CF=1 | ZF=1 */
21340 return TARGET_IEEE_FP ? UNKNOWN : GE;
21342 return swap_condition (code);
21346 /* Return cost of comparison CODE using the best strategy for performance.
21347 All following functions do use number of instructions as a cost metrics.
21348 In future this should be tweaked to compute bytes for optimize_size and
21349 take into account performance of various instructions on various CPUs. */
21352 ix86_fp_comparison_cost (enum rtx_code code)
21356 /* The cost of code using bit-twiddling on %ah. */
21373 arith_cost = TARGET_IEEE_FP ? 5 : 4;
21377 arith_cost = TARGET_IEEE_FP ? 6 : 4;
21380 gcc_unreachable ();
21383 switch (ix86_fp_comparison_strategy (code))
21385 case IX86_FPCMP_COMI:
21386 return arith_cost > 4 ? 3 : 2;
21387 case IX86_FPCMP_SAHF:
21388 return arith_cost > 4 ? 4 : 3;
21394 /* Return strategy to use for floating-point. We assume that fcomi is always
21395 preferrable where available, since that is also true when looking at size
21396 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
21398 enum ix86_fpcmp_strategy
21399 ix86_fp_comparison_strategy (enum rtx_code)
21401 /* Do fcomi/sahf based test when profitable. */
21404 return IX86_FPCMP_COMI;
21406 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
21407 return IX86_FPCMP_SAHF;
21409 return IX86_FPCMP_ARITH;
21412 /* Swap, force into registers, or otherwise massage the two operands
21413 to a fp comparison. The operands are updated in place; the new
21414 comparison code is returned. */
21416 static enum rtx_code
21417 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
21419 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
21420 rtx op0 = *pop0, op1 = *pop1;
21421 machine_mode op_mode = GET_MODE (op0);
21422 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
21424 /* All of the unordered compare instructions only work on registers.
21425 The same is true of the fcomi compare instructions. The XFmode
21426 compare instructions require registers except when comparing
21427 against zero or when converting operand 1 from fixed point to
21431 && (fpcmp_mode == CCFPUmode
21432 || (op_mode == XFmode
21433 && ! (standard_80387_constant_p (op0) == 1
21434 || standard_80387_constant_p (op1) == 1)
21435 && GET_CODE (op1) != FLOAT)
21436 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
21438 op0 = force_reg (op_mode, op0);
21439 op1 = force_reg (op_mode, op1);
21443 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
21444 things around if they appear profitable, otherwise force op0
21445 into a register. */
21447 if (standard_80387_constant_p (op0) == 0
21449 && ! (standard_80387_constant_p (op1) == 0
21452 enum rtx_code new_code = ix86_fp_swap_condition (code);
21453 if (new_code != UNKNOWN)
21455 std::swap (op0, op1);
21461 op0 = force_reg (op_mode, op0);
21463 if (CONSTANT_P (op1))
21465 int tmp = standard_80387_constant_p (op1);
21467 op1 = validize_mem (force_const_mem (op_mode, op1));
21471 op1 = force_reg (op_mode, op1);
21474 op1 = force_reg (op_mode, op1);
21478 /* Try to rearrange the comparison to make it cheaper. */
21479 if (ix86_fp_comparison_cost (code)
21480 > ix86_fp_comparison_cost (swap_condition (code))
21481 && (REG_P (op1) || can_create_pseudo_p ()))
21483 std::swap (op0, op1);
21484 code = swap_condition (code);
21486 op0 = force_reg (op_mode, op0);
21494 /* Convert comparison codes we use to represent FP comparison to integer
21495 code that will result in proper branch. Return UNKNOWN if no such code
21499 ix86_fp_compare_code_to_integer (enum rtx_code code)
21528 /* Generate insn patterns to do a floating point compare of OPERANDS. */
21531 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
21533 machine_mode fpcmp_mode, intcmp_mode;
21536 fpcmp_mode = ix86_fp_compare_mode (code);
21537 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
21539 /* Do fcomi/sahf based test when profitable. */
21540 switch (ix86_fp_comparison_strategy (code))
21542 case IX86_FPCMP_COMI:
21543 intcmp_mode = fpcmp_mode;
21544 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
21545 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
21549 case IX86_FPCMP_SAHF:
21550 intcmp_mode = fpcmp_mode;
21551 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
21552 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
21555 scratch = gen_reg_rtx (HImode);
21556 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
21557 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
21560 case IX86_FPCMP_ARITH:
21561 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
21562 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
21563 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
21565 scratch = gen_reg_rtx (HImode);
21566 emit_insn (gen_rtx_SET (scratch, tmp2));
21568 /* In the unordered case, we have to check C2 for NaN's, which
21569 doesn't happen to work out to anything nice combination-wise.
21570 So do some bit twiddling on the value we've got in AH to come
21571 up with an appropriate set of condition codes. */
21573 intcmp_mode = CCNOmode;
21578 if (code == GT || !TARGET_IEEE_FP)
21580 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
21585 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21586 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
21587 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
21588 intcmp_mode = CCmode;
21594 if (code == LT && TARGET_IEEE_FP)
21596 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21597 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
21598 intcmp_mode = CCmode;
21603 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
21609 if (code == GE || !TARGET_IEEE_FP)
21611 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
21616 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21617 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
21623 if (code == LE && TARGET_IEEE_FP)
21625 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21626 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
21627 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
21628 intcmp_mode = CCmode;
21633 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
21639 if (code == EQ && TARGET_IEEE_FP)
21641 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21642 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
21643 intcmp_mode = CCmode;
21648 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
21654 if (code == NE && TARGET_IEEE_FP)
21656 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21657 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
21663 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
21669 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
21673 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
21678 gcc_unreachable ();
21686 /* Return the test that should be put into the flags user, i.e.
21687 the bcc, scc, or cmov instruction. */
21688 return gen_rtx_fmt_ee (code, VOIDmode,
21689 gen_rtx_REG (intcmp_mode, FLAGS_REG),
21694 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
21698 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
21699 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
21701 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
21703 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
21704 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
21707 ret = ix86_expand_int_compare (code, op0, op1);
21713 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
21715 machine_mode mode = GET_MODE (op0);
21718 /* Handle special case - vector comparsion with boolean result, transform
21719 it using ptest instruction. */
21720 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21722 rtx flag = gen_rtx_REG (CCZmode, FLAGS_REG);
21723 machine_mode p_mode = GET_MODE_SIZE (mode) == 32 ? V4DImode : V2DImode;
21725 gcc_assert (code == EQ || code == NE);
21726 /* Generate XOR since we can't check that one operand is zero vector. */
21727 tmp = gen_reg_rtx (mode);
21728 emit_insn (gen_rtx_SET (tmp, gen_rtx_XOR (mode, op0, op1)));
21729 tmp = gen_lowpart (p_mode, tmp);
21730 emit_insn (gen_rtx_SET (gen_rtx_REG (CCmode, FLAGS_REG),
21731 gen_rtx_UNSPEC (CCmode,
21732 gen_rtvec (2, tmp, tmp),
21734 tmp = gen_rtx_fmt_ee (code, VOIDmode, flag, const0_rtx);
21735 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21736 gen_rtx_LABEL_REF (VOIDmode, label),
21738 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
21751 tmp = ix86_expand_compare (code, op0, op1);
21752 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21753 gen_rtx_LABEL_REF (VOIDmode, label),
21755 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
21761 /* For 32-bit target DI comparison may be performed on
21762 SSE registers. To allow this we should avoid split
21763 to SI mode which is achieved by doing xor in DI mode
21764 and then comparing with zero (which is recognized by
21765 STV pass). We don't compare using xor when optimizing
21767 if (!optimize_insn_for_size_p ()
21769 && (code == EQ || code == NE))
21771 op0 = force_reg (mode, gen_rtx_XOR (mode, op0, op1));
21775 /* Expand DImode branch into multiple compare+branch. */
21778 rtx_code_label *label2;
21779 enum rtx_code code1, code2, code3;
21780 machine_mode submode;
21782 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
21784 std::swap (op0, op1);
21785 code = swap_condition (code);
21788 split_double_mode (mode, &op0, 1, lo+0, hi+0);
21789 split_double_mode (mode, &op1, 1, lo+1, hi+1);
21791 submode = mode == DImode ? SImode : DImode;
21793 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
21794 avoid two branches. This costs one extra insn, so disable when
21795 optimizing for size. */
21797 if ((code == EQ || code == NE)
21798 && (!optimize_insn_for_size_p ()
21799 || hi[1] == const0_rtx || lo[1] == const0_rtx))
21804 if (hi[1] != const0_rtx)
21805 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
21806 NULL_RTX, 0, OPTAB_WIDEN);
21809 if (lo[1] != const0_rtx)
21810 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
21811 NULL_RTX, 0, OPTAB_WIDEN);
21813 tmp = expand_binop (submode, ior_optab, xor1, xor0,
21814 NULL_RTX, 0, OPTAB_WIDEN);
21816 ix86_expand_branch (code, tmp, const0_rtx, label);
21820 /* Otherwise, if we are doing less-than or greater-or-equal-than,
21821 op1 is a constant and the low word is zero, then we can just
21822 examine the high word. Similarly for low word -1 and
21823 less-or-equal-than or greater-than. */
21825 if (CONST_INT_P (hi[1]))
21828 case LT: case LTU: case GE: case GEU:
21829 if (lo[1] == const0_rtx)
21831 ix86_expand_branch (code, hi[0], hi[1], label);
21835 case LE: case LEU: case GT: case GTU:
21836 if (lo[1] == constm1_rtx)
21838 ix86_expand_branch (code, hi[0], hi[1], label);
21846 /* Otherwise, we need two or three jumps. */
21848 label2 = gen_label_rtx ();
21851 code2 = swap_condition (code);
21852 code3 = unsigned_condition (code);
21856 case LT: case GT: case LTU: case GTU:
21859 case LE: code1 = LT; code2 = GT; break;
21860 case GE: code1 = GT; code2 = LT; break;
21861 case LEU: code1 = LTU; code2 = GTU; break;
21862 case GEU: code1 = GTU; code2 = LTU; break;
21864 case EQ: code1 = UNKNOWN; code2 = NE; break;
21865 case NE: code2 = UNKNOWN; break;
21868 gcc_unreachable ();
21873 * if (hi(a) < hi(b)) goto true;
21874 * if (hi(a) > hi(b)) goto false;
21875 * if (lo(a) < lo(b)) goto true;
21879 if (code1 != UNKNOWN)
21880 ix86_expand_branch (code1, hi[0], hi[1], label);
21881 if (code2 != UNKNOWN)
21882 ix86_expand_branch (code2, hi[0], hi[1], label2);
21884 ix86_expand_branch (code3, lo[0], lo[1], label);
21886 if (code2 != UNKNOWN)
21887 emit_label (label2);
21892 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
21897 /* Split branch based on floating point condition. */
21899 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
21900 rtx target1, rtx target2, rtx tmp)
21905 if (target2 != pc_rtx)
21907 std::swap (target1, target2);
21908 code = reverse_condition_maybe_unordered (code);
21911 condition = ix86_expand_fp_compare (code, op1, op2,
21914 i = emit_jump_insn (gen_rtx_SET
21916 gen_rtx_IF_THEN_ELSE (VOIDmode,
21917 condition, target1, target2)));
21918 if (split_branch_probability >= 0)
21919 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
21923 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
21927 gcc_assert (GET_MODE (dest) == QImode);
21929 ret = ix86_expand_compare (code, op0, op1);
21930 PUT_MODE (ret, QImode);
21931 emit_insn (gen_rtx_SET (dest, ret));
21934 /* Expand comparison setting or clearing carry flag. Return true when
21935 successful and set pop for the operation. */
21937 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
21939 machine_mode mode =
21940 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
21942 /* Do not handle double-mode compares that go through special path. */
21943 if (mode == (TARGET_64BIT ? TImode : DImode))
21946 if (SCALAR_FLOAT_MODE_P (mode))
21949 rtx_insn *compare_seq;
21951 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
21953 /* Shortcut: following common codes never translate
21954 into carry flag compares. */
21955 if (code == EQ || code == NE || code == UNEQ || code == LTGT
21956 || code == ORDERED || code == UNORDERED)
21959 /* These comparisons require zero flag; swap operands so they won't. */
21960 if ((code == GT || code == UNLE || code == LE || code == UNGT)
21961 && !TARGET_IEEE_FP)
21963 std::swap (op0, op1);
21964 code = swap_condition (code);
21967 /* Try to expand the comparison and verify that we end up with
21968 carry flag based comparison. This fails to be true only when
21969 we decide to expand comparison using arithmetic that is not
21970 too common scenario. */
21972 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
21973 compare_seq = get_insns ();
21976 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
21977 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
21978 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
21980 code = GET_CODE (compare_op);
21982 if (code != LTU && code != GEU)
21985 emit_insn (compare_seq);
21990 if (!INTEGRAL_MODE_P (mode))
21999 /* Convert a==0 into (unsigned)a<1. */
22002 if (op1 != const0_rtx)
22005 code = (code == EQ ? LTU : GEU);
22008 /* Convert a>b into b<a or a>=b-1. */
22011 if (CONST_INT_P (op1))
22013 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
22014 /* Bail out on overflow. We still can swap operands but that
22015 would force loading of the constant into register. */
22016 if (op1 == const0_rtx
22017 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
22019 code = (code == GTU ? GEU : LTU);
22023 std::swap (op0, op1);
22024 code = (code == GTU ? LTU : GEU);
22028 /* Convert a>=0 into (unsigned)a<0x80000000. */
22031 if (mode == DImode || op1 != const0_rtx)
22033 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
22034 code = (code == LT ? GEU : LTU);
22038 if (mode == DImode || op1 != constm1_rtx)
22040 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
22041 code = (code == LE ? GEU : LTU);
22047 /* Swapping operands may cause constant to appear as first operand. */
22048 if (!nonimmediate_operand (op0, VOIDmode))
22050 if (!can_create_pseudo_p ())
22052 op0 = force_reg (mode, op0);
22054 *pop = ix86_expand_compare (code, op0, op1);
22055 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
22060 ix86_expand_int_movcc (rtx operands[])
22062 enum rtx_code code = GET_CODE (operands[1]), compare_code;
22063 rtx_insn *compare_seq;
22065 machine_mode mode = GET_MODE (operands[0]);
22066 bool sign_bit_compare_p = false;
22067 rtx op0 = XEXP (operands[1], 0);
22068 rtx op1 = XEXP (operands[1], 1);
22070 if (GET_MODE (op0) == TImode
22071 || (GET_MODE (op0) == DImode
22076 compare_op = ix86_expand_compare (code, op0, op1);
22077 compare_seq = get_insns ();
22080 compare_code = GET_CODE (compare_op);
22082 if ((op1 == const0_rtx && (code == GE || code == LT))
22083 || (op1 == constm1_rtx && (code == GT || code == LE)))
22084 sign_bit_compare_p = true;
22086 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
22087 HImode insns, we'd be swallowed in word prefix ops. */
22089 if ((mode != HImode || TARGET_FAST_PREFIX)
22090 && (mode != (TARGET_64BIT ? TImode : DImode))
22091 && CONST_INT_P (operands[2])
22092 && CONST_INT_P (operands[3]))
22094 rtx out = operands[0];
22095 HOST_WIDE_INT ct = INTVAL (operands[2]);
22096 HOST_WIDE_INT cf = INTVAL (operands[3]);
22097 HOST_WIDE_INT diff;
22100 /* Sign bit compares are better done using shifts than we do by using
22102 if (sign_bit_compare_p
22103 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22105 /* Detect overlap between destination and compare sources. */
22108 if (!sign_bit_compare_p)
22111 bool fpcmp = false;
22113 compare_code = GET_CODE (compare_op);
22115 flags = XEXP (compare_op, 0);
22117 if (GET_MODE (flags) == CCFPmode
22118 || GET_MODE (flags) == CCFPUmode)
22122 = ix86_fp_compare_code_to_integer (compare_code);
22125 /* To simplify rest of code, restrict to the GEU case. */
22126 if (compare_code == LTU)
22128 std::swap (ct, cf);
22129 compare_code = reverse_condition (compare_code);
22130 code = reverse_condition (code);
22135 PUT_CODE (compare_op,
22136 reverse_condition_maybe_unordered
22137 (GET_CODE (compare_op)));
22139 PUT_CODE (compare_op,
22140 reverse_condition (GET_CODE (compare_op)));
22144 if (reg_overlap_mentioned_p (out, op0)
22145 || reg_overlap_mentioned_p (out, op1))
22146 tmp = gen_reg_rtx (mode);
22148 if (mode == DImode)
22149 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
22151 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
22152 flags, compare_op));
22156 if (code == GT || code == GE)
22157 code = reverse_condition (code);
22160 std::swap (ct, cf);
22163 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
22176 tmp = expand_simple_binop (mode, PLUS,
22178 copy_rtx (tmp), 1, OPTAB_DIRECT);
22189 tmp = expand_simple_binop (mode, IOR,
22191 copy_rtx (tmp), 1, OPTAB_DIRECT);
22193 else if (diff == -1 && ct)
22203 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
22205 tmp = expand_simple_binop (mode, PLUS,
22206 copy_rtx (tmp), GEN_INT (cf),
22207 copy_rtx (tmp), 1, OPTAB_DIRECT);
22215 * andl cf - ct, dest
22225 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
22228 tmp = expand_simple_binop (mode, AND,
22230 gen_int_mode (cf - ct, mode),
22231 copy_rtx (tmp), 1, OPTAB_DIRECT);
22233 tmp = expand_simple_binop (mode, PLUS,
22234 copy_rtx (tmp), GEN_INT (ct),
22235 copy_rtx (tmp), 1, OPTAB_DIRECT);
22238 if (!rtx_equal_p (tmp, out))
22239 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
22246 machine_mode cmp_mode = GET_MODE (op0);
22247 enum rtx_code new_code;
22249 if (SCALAR_FLOAT_MODE_P (cmp_mode))
22251 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
22253 /* We may be reversing unordered compare to normal compare, that
22254 is not valid in general (we may convert non-trapping condition
22255 to trapping one), however on i386 we currently emit all
22256 comparisons unordered. */
22257 new_code = reverse_condition_maybe_unordered (code);
22260 new_code = ix86_reverse_condition (code, cmp_mode);
22261 if (new_code != UNKNOWN)
22263 std::swap (ct, cf);
22269 compare_code = UNKNOWN;
22270 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
22271 && CONST_INT_P (op1))
22273 if (op1 == const0_rtx
22274 && (code == LT || code == GE))
22275 compare_code = code;
22276 else if (op1 == constm1_rtx)
22280 else if (code == GT)
22285 /* Optimize dest = (op0 < 0) ? -1 : cf. */
22286 if (compare_code != UNKNOWN
22287 && GET_MODE (op0) == GET_MODE (out)
22288 && (cf == -1 || ct == -1))
22290 /* If lea code below could be used, only optimize
22291 if it results in a 2 insn sequence. */
22293 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
22294 || diff == 3 || diff == 5 || diff == 9)
22295 || (compare_code == LT && ct == -1)
22296 || (compare_code == GE && cf == -1))
22299 * notl op1 (if necessary)
22307 code = reverse_condition (code);
22310 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
22312 out = expand_simple_binop (mode, IOR,
22314 out, 1, OPTAB_DIRECT);
22315 if (out != operands[0])
22316 emit_move_insn (operands[0], out);
22323 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
22324 || diff == 3 || diff == 5 || diff == 9)
22325 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
22327 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
22333 * lea cf(dest*(ct-cf)),dest
22337 * This also catches the degenerate setcc-only case.
22343 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
22346 /* On x86_64 the lea instruction operates on Pmode, so we need
22347 to get arithmetics done in proper mode to match. */
22349 tmp = copy_rtx (out);
22353 out1 = copy_rtx (out);
22354 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
22358 tmp = gen_rtx_PLUS (mode, tmp, out1);
22364 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
22367 if (!rtx_equal_p (tmp, out))
22370 out = force_operand (tmp, copy_rtx (out));
22372 emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp)));
22374 if (!rtx_equal_p (out, operands[0]))
22375 emit_move_insn (operands[0], copy_rtx (out));
22381 * General case: Jumpful:
22382 * xorl dest,dest cmpl op1, op2
22383 * cmpl op1, op2 movl ct, dest
22384 * setcc dest jcc 1f
22385 * decl dest movl cf, dest
22386 * andl (cf-ct),dest 1:
22389 * Size 20. Size 14.
22391 * This is reasonably steep, but branch mispredict costs are
22392 * high on modern cpus, so consider failing only if optimizing
22396 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
22397 && BRANCH_COST (optimize_insn_for_speed_p (),
22402 machine_mode cmp_mode = GET_MODE (op0);
22403 enum rtx_code new_code;
22405 if (SCALAR_FLOAT_MODE_P (cmp_mode))
22407 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
22409 /* We may be reversing unordered compare to normal compare,
22410 that is not valid in general (we may convert non-trapping
22411 condition to trapping one), however on i386 we currently
22412 emit all comparisons unordered. */
22413 new_code = reverse_condition_maybe_unordered (code);
22417 new_code = ix86_reverse_condition (code, cmp_mode);
22418 if (compare_code != UNKNOWN && new_code != UNKNOWN)
22419 compare_code = reverse_condition (compare_code);
22422 if (new_code != UNKNOWN)
22430 if (compare_code != UNKNOWN)
22432 /* notl op1 (if needed)
22437 For x < 0 (resp. x <= -1) there will be no notl,
22438 so if possible swap the constants to get rid of the
22440 True/false will be -1/0 while code below (store flag
22441 followed by decrement) is 0/-1, so the constants need
22442 to be exchanged once more. */
22444 if (compare_code == GE || !cf)
22446 code = reverse_condition (code);
22450 std::swap (ct, cf);
22452 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
22456 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
22458 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
22460 copy_rtx (out), 1, OPTAB_DIRECT);
22463 out = expand_simple_binop (mode, AND, copy_rtx (out),
22464 gen_int_mode (cf - ct, mode),
22465 copy_rtx (out), 1, OPTAB_DIRECT);
22467 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
22468 copy_rtx (out), 1, OPTAB_DIRECT);
22469 if (!rtx_equal_p (out, operands[0]))
22470 emit_move_insn (operands[0], copy_rtx (out));
22476 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
22478 /* Try a few things more with specific constants and a variable. */
22481 rtx var, orig_out, out, tmp;
22483 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
22486 /* If one of the two operands is an interesting constant, load a
22487 constant with the above and mask it in with a logical operation. */
22489 if (CONST_INT_P (operands[2]))
22492 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
22493 operands[3] = constm1_rtx, op = and_optab;
22494 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
22495 operands[3] = const0_rtx, op = ior_optab;
22499 else if (CONST_INT_P (operands[3]))
22502 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
22503 operands[2] = constm1_rtx, op = and_optab;
22504 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
22505 operands[2] = const0_rtx, op = ior_optab;
22512 orig_out = operands[0];
22513 tmp = gen_reg_rtx (mode);
22516 /* Recurse to get the constant loaded. */
22517 if (!ix86_expand_int_movcc (operands))
22520 /* Mask in the interesting variable. */
22521 out = expand_binop (mode, op, var, tmp, orig_out, 0,
22523 if (!rtx_equal_p (out, orig_out))
22524 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
22530 * For comparison with above,
22540 if (! nonimmediate_operand (operands[2], mode))
22541 operands[2] = force_reg (mode, operands[2]);
22542 if (! nonimmediate_operand (operands[3], mode))
22543 operands[3] = force_reg (mode, operands[3]);
22545 if (! register_operand (operands[2], VOIDmode)
22547 || ! register_operand (operands[3], VOIDmode)))
22548 operands[2] = force_reg (mode, operands[2]);
22551 && ! register_operand (operands[3], VOIDmode))
22552 operands[3] = force_reg (mode, operands[3]);
22554 emit_insn (compare_seq);
22555 emit_insn (gen_rtx_SET (operands[0],
22556 gen_rtx_IF_THEN_ELSE (mode,
22557 compare_op, operands[2],
22562 /* Swap, force into registers, or otherwise massage the two operands
22563 to an sse comparison with a mask result. Thus we differ a bit from
22564 ix86_prepare_fp_compare_args which expects to produce a flags result.
22566 The DEST operand exists to help determine whether to commute commutative
22567 operators. The POP0/POP1 operands are updated in place. The new
22568 comparison code is returned, or UNKNOWN if not implementable. */
22570 static enum rtx_code
22571 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
22572 rtx *pop0, rtx *pop1)
22578 /* AVX supports all the needed comparisons. */
22581 /* We have no LTGT as an operator. We could implement it with
22582 NE & ORDERED, but this requires an extra temporary. It's
22583 not clear that it's worth it. */
22590 /* These are supported directly. */
22597 /* AVX has 3 operand comparisons, no need to swap anything. */
22600 /* For commutative operators, try to canonicalize the destination
22601 operand to be first in the comparison - this helps reload to
22602 avoid extra moves. */
22603 if (!dest || !rtx_equal_p (dest, *pop1))
22611 /* These are not supported directly before AVX, and furthermore
22612 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
22613 comparison operands to transform into something that is
22615 std::swap (*pop0, *pop1);
22616 code = swap_condition (code);
22620 gcc_unreachable ();
22626 /* Detect conditional moves that exactly match min/max operational
22627 semantics. Note that this is IEEE safe, as long as we don't
22628 interchange the operands.
22630 Returns FALSE if this conditional move doesn't match a MIN/MAX,
22631 and TRUE if the operation is successful and instructions are emitted. */
22634 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
22635 rtx cmp_op1, rtx if_true, rtx if_false)
22643 else if (code == UNGE)
22644 std::swap (if_true, if_false);
22648 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
22650 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
22655 mode = GET_MODE (dest);
22657 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
22658 but MODE may be a vector mode and thus not appropriate. */
22659 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
22661 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
22664 if_true = force_reg (mode, if_true);
22665 v = gen_rtvec (2, if_true, if_false);
22666 tmp = gen_rtx_UNSPEC (mode, v, u);
22670 code = is_min ? SMIN : SMAX;
22671 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
22674 emit_insn (gen_rtx_SET (dest, tmp));
22678 /* Expand an sse vector comparison. Return the register with the result. */
22681 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
22682 rtx op_true, rtx op_false)
22684 machine_mode mode = GET_MODE (dest);
22685 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
22687 /* In general case result of comparison can differ from operands' type. */
22688 machine_mode cmp_mode;
22690 /* In AVX512F the result of comparison is an integer mask. */
22691 bool maskcmp = false;
22694 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
22696 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
22697 gcc_assert (cmp_mode != BLKmode);
22702 cmp_mode = cmp_ops_mode;
22705 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
22706 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
22707 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
22710 || (op_true && reg_overlap_mentioned_p (dest, op_true))
22711 || (op_false && reg_overlap_mentioned_p (dest, op_false)))
22712 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
22714 /* Compare patterns for int modes are unspec in AVX512F only. */
22715 if (maskcmp && (code == GT || code == EQ))
22717 rtx (*gen)(rtx, rtx, rtx);
22719 switch (cmp_ops_mode)
22722 gcc_assert (TARGET_AVX512BW);
22723 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
22726 gcc_assert (TARGET_AVX512BW);
22727 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
22730 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
22733 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
22741 emit_insn (gen (dest, cmp_op0, cmp_op1));
22745 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
22747 if (cmp_mode != mode && !maskcmp)
22749 x = force_reg (cmp_ops_mode, x);
22750 convert_move (dest, x, false);
22753 emit_insn (gen_rtx_SET (dest, x));
22758 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
22759 operations. This is used for both scalar and vector conditional moves. */
22762 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
22764 machine_mode mode = GET_MODE (dest);
22765 machine_mode cmpmode = GET_MODE (cmp);
22767 /* In AVX512F the result of comparison is an integer mask. */
22768 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
22772 /* If we have an integer mask and FP value then we need
22773 to cast mask to FP mode. */
22774 if (mode != cmpmode && VECTOR_MODE_P (cmpmode))
22776 cmp = force_reg (cmpmode, cmp);
22777 cmp = gen_rtx_SUBREG (mode, cmp, 0);
22780 if (vector_all_ones_operand (op_true, mode)
22781 && rtx_equal_p (op_false, CONST0_RTX (mode))
22784 emit_insn (gen_rtx_SET (dest, cmp));
22786 else if (op_false == CONST0_RTX (mode)
22789 op_true = force_reg (mode, op_true);
22790 x = gen_rtx_AND (mode, cmp, op_true);
22791 emit_insn (gen_rtx_SET (dest, x));
22793 else if (op_true == CONST0_RTX (mode)
22796 op_false = force_reg (mode, op_false);
22797 x = gen_rtx_NOT (mode, cmp);
22798 x = gen_rtx_AND (mode, x, op_false);
22799 emit_insn (gen_rtx_SET (dest, x));
22801 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
22804 op_false = force_reg (mode, op_false);
22805 x = gen_rtx_IOR (mode, cmp, op_false);
22806 emit_insn (gen_rtx_SET (dest, x));
22808 else if (TARGET_XOP
22811 op_true = force_reg (mode, op_true);
22813 if (!nonimmediate_operand (op_false, mode))
22814 op_false = force_reg (mode, op_false);
22816 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
22822 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
22825 if (!nonimmediate_operand (op_true, mode))
22826 op_true = force_reg (mode, op_true);
22828 op_false = force_reg (mode, op_false);
22834 gen = gen_sse4_1_blendvps;
22838 gen = gen_sse4_1_blendvpd;
22846 gen = gen_sse4_1_pblendvb;
22847 if (mode != V16QImode)
22848 d = gen_reg_rtx (V16QImode);
22849 op_false = gen_lowpart (V16QImode, op_false);
22850 op_true = gen_lowpart (V16QImode, op_true);
22851 cmp = gen_lowpart (V16QImode, cmp);
22856 gen = gen_avx_blendvps256;
22860 gen = gen_avx_blendvpd256;
22868 gen = gen_avx2_pblendvb;
22869 if (mode != V32QImode)
22870 d = gen_reg_rtx (V32QImode);
22871 op_false = gen_lowpart (V32QImode, op_false);
22872 op_true = gen_lowpart (V32QImode, op_true);
22873 cmp = gen_lowpart (V32QImode, cmp);
22878 gen = gen_avx512bw_blendmv64qi;
22881 gen = gen_avx512bw_blendmv32hi;
22884 gen = gen_avx512f_blendmv16si;
22887 gen = gen_avx512f_blendmv8di;
22890 gen = gen_avx512f_blendmv8df;
22893 gen = gen_avx512f_blendmv16sf;
22902 emit_insn (gen (d, op_false, op_true, cmp));
22904 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
22908 op_true = force_reg (mode, op_true);
22910 t2 = gen_reg_rtx (mode);
22912 t3 = gen_reg_rtx (mode);
22916 x = gen_rtx_AND (mode, op_true, cmp);
22917 emit_insn (gen_rtx_SET (t2, x));
22919 x = gen_rtx_NOT (mode, cmp);
22920 x = gen_rtx_AND (mode, x, op_false);
22921 emit_insn (gen_rtx_SET (t3, x));
22923 x = gen_rtx_IOR (mode, t3, t2);
22924 emit_insn (gen_rtx_SET (dest, x));
22929 /* Expand a floating-point conditional move. Return true if successful. */
22932 ix86_expand_fp_movcc (rtx operands[])
22934 machine_mode mode = GET_MODE (operands[0]);
22935 enum rtx_code code = GET_CODE (operands[1]);
22936 rtx tmp, compare_op;
22937 rtx op0 = XEXP (operands[1], 0);
22938 rtx op1 = XEXP (operands[1], 1);
22940 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
22942 machine_mode cmode;
22944 /* Since we've no cmove for sse registers, don't force bad register
22945 allocation just to gain access to it. Deny movcc when the
22946 comparison mode doesn't match the move mode. */
22947 cmode = GET_MODE (op0);
22948 if (cmode == VOIDmode)
22949 cmode = GET_MODE (op1);
22953 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
22954 if (code == UNKNOWN)
22957 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
22958 operands[2], operands[3]))
22961 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
22962 operands[2], operands[3]);
22963 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
22967 if (GET_MODE (op0) == TImode
22968 || (GET_MODE (op0) == DImode
22972 /* The floating point conditional move instructions don't directly
22973 support conditions resulting from a signed integer comparison. */
22975 compare_op = ix86_expand_compare (code, op0, op1);
22976 if (!fcmov_comparison_operator (compare_op, VOIDmode))
22978 tmp = gen_reg_rtx (QImode);
22979 ix86_expand_setcc (tmp, code, op0, op1);
22981 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
22984 emit_insn (gen_rtx_SET (operands[0],
22985 gen_rtx_IF_THEN_ELSE (mode, compare_op,
22986 operands[2], operands[3])));
22991 /* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */
22994 ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code)
23015 gcc_unreachable ();
23019 /* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */
23022 ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code)
23039 gcc_unreachable ();
23043 /* Return immediate value to be used in UNSPEC_PCMP
23044 for comparison CODE in MODE. */
23047 ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode)
23049 if (FLOAT_MODE_P (mode))
23050 return ix86_fp_cmp_code_to_pcmp_immediate (code);
23051 return ix86_int_cmp_code_to_pcmp_immediate (code);
23054 /* Expand AVX-512 vector comparison. */
23057 ix86_expand_mask_vec_cmp (rtx operands[])
23059 machine_mode mask_mode = GET_MODE (operands[0]);
23060 machine_mode cmp_mode = GET_MODE (operands[2]);
23061 enum rtx_code code = GET_CODE (operands[1]);
23062 rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode));
23072 unspec_code = UNSPEC_UNSIGNED_PCMP;
23076 unspec_code = UNSPEC_PCMP;
23079 unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2],
23082 emit_insn (gen_rtx_SET (operands[0], unspec));
23087 /* Expand fp vector comparison. */
23090 ix86_expand_fp_vec_cmp (rtx operands[])
23092 enum rtx_code code = GET_CODE (operands[1]);
23095 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
23096 &operands[2], &operands[3]);
23097 if (code == UNKNOWN)
23100 switch (GET_CODE (operands[1]))
23103 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2],
23104 operands[3], NULL, NULL);
23105 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2],
23106 operands[3], NULL, NULL);
23110 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2],
23111 operands[3], NULL, NULL);
23112 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2],
23113 operands[3], NULL, NULL);
23117 gcc_unreachable ();
23119 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
23123 cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3],
23124 operands[1], operands[2]);
23126 if (operands[0] != cmp)
23127 emit_move_insn (operands[0], cmp);
23133 ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
23134 rtx op_true, rtx op_false, bool *negate)
23136 machine_mode data_mode = GET_MODE (dest);
23137 machine_mode mode = GET_MODE (cop0);
23142 /* XOP supports all of the comparisons on all 128-bit vector int types. */
23144 && (mode == V16QImode || mode == V8HImode
23145 || mode == V4SImode || mode == V2DImode))
23149 /* Canonicalize the comparison to EQ, GT, GTU. */
23160 code = reverse_condition (code);
23166 code = reverse_condition (code);
23172 std::swap (cop0, cop1);
23173 code = swap_condition (code);
23177 gcc_unreachable ();
23180 /* Only SSE4.1/SSE4.2 supports V2DImode. */
23181 if (mode == V2DImode)
23186 /* SSE4.1 supports EQ. */
23187 if (!TARGET_SSE4_1)
23193 /* SSE4.2 supports GT/GTU. */
23194 if (!TARGET_SSE4_2)
23199 gcc_unreachable ();
23203 /* Unsigned parallel compare is not supported by the hardware.
23204 Play some tricks to turn this into a signed comparison
23208 cop0 = force_reg (mode, cop0);
23220 rtx (*gen_sub3) (rtx, rtx, rtx);
23224 case V16SImode: gen_sub3 = gen_subv16si3; break;
23225 case V8DImode: gen_sub3 = gen_subv8di3; break;
23226 case V8SImode: gen_sub3 = gen_subv8si3; break;
23227 case V4DImode: gen_sub3 = gen_subv4di3; break;
23228 case V4SImode: gen_sub3 = gen_subv4si3; break;
23229 case V2DImode: gen_sub3 = gen_subv2di3; break;
23231 gcc_unreachable ();
23233 /* Subtract (-(INT MAX) - 1) from both operands to make
23235 mask = ix86_build_signbit_mask (mode, true, false);
23236 t1 = gen_reg_rtx (mode);
23237 emit_insn (gen_sub3 (t1, cop0, mask));
23239 t2 = gen_reg_rtx (mode);
23240 emit_insn (gen_sub3 (t2, cop1, mask));
23254 /* Perform a parallel unsigned saturating subtraction. */
23255 x = gen_reg_rtx (mode);
23256 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0,
23260 cop1 = CONST0_RTX (mode);
23262 *negate = !*negate;
23266 gcc_unreachable ();
23272 std::swap (op_true, op_false);
23274 /* Allow the comparison to be done in one mode, but the movcc to
23275 happen in another mode. */
23276 if (data_mode == mode)
23278 x = ix86_expand_sse_cmp (dest, code, cop0, cop1,
23279 op_true, op_false);
23283 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
23284 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
23285 op_true, op_false);
23286 if (GET_MODE (x) == mode)
23287 x = gen_lowpart (data_mode, x);
23293 /* Expand integer vector comparison. */
23296 ix86_expand_int_vec_cmp (rtx operands[])
23298 rtx_code code = GET_CODE (operands[1]);
23299 bool negate = false;
23300 rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2],
23301 operands[3], NULL, NULL, &negate);
23307 cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp,
23308 CONST0_RTX (GET_MODE (cmp)),
23309 NULL, NULL, &negate);
23311 gcc_assert (!negate);
23313 if (operands[0] != cmp)
23314 emit_move_insn (operands[0], cmp);
23319 /* Expand a floating-point vector conditional move; a vcond operation
23320 rather than a movcc operation. */
23323 ix86_expand_fp_vcond (rtx operands[])
23325 enum rtx_code code = GET_CODE (operands[3]);
23328 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
23329 &operands[4], &operands[5]);
23330 if (code == UNKNOWN)
23333 switch (GET_CODE (operands[3]))
23336 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
23337 operands[5], operands[0], operands[0]);
23338 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
23339 operands[5], operands[1], operands[2]);
23343 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
23344 operands[5], operands[0], operands[0]);
23345 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
23346 operands[5], operands[1], operands[2]);
23350 gcc_unreachable ();
23352 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
23354 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
23358 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
23359 operands[5], operands[1], operands[2]))
23362 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
23363 operands[1], operands[2]);
23364 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
23368 /* Expand a signed/unsigned integral vector conditional move. */
23371 ix86_expand_int_vcond (rtx operands[])
23373 machine_mode data_mode = GET_MODE (operands[0]);
23374 machine_mode mode = GET_MODE (operands[4]);
23375 enum rtx_code code = GET_CODE (operands[3]);
23376 bool negate = false;
23379 cop0 = operands[4];
23380 cop1 = operands[5];
23382 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
23383 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
23384 if ((code == LT || code == GE)
23385 && data_mode == mode
23386 && cop1 == CONST0_RTX (mode)
23387 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
23388 && GET_MODE_UNIT_SIZE (data_mode) > 1
23389 && GET_MODE_UNIT_SIZE (data_mode) <= 8
23390 && (GET_MODE_SIZE (data_mode) == 16
23391 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
23393 rtx negop = operands[2 - (code == LT)];
23394 int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1;
23395 if (negop == CONST1_RTX (data_mode))
23397 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
23398 operands[0], 1, OPTAB_DIRECT);
23399 if (res != operands[0])
23400 emit_move_insn (operands[0], res);
23403 else if (GET_MODE_INNER (data_mode) != DImode
23404 && vector_all_ones_operand (negop, data_mode))
23406 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
23407 operands[0], 0, OPTAB_DIRECT);
23408 if (res != operands[0])
23409 emit_move_insn (operands[0], res);
23414 if (!nonimmediate_operand (cop1, mode))
23415 cop1 = force_reg (mode, cop1);
23416 if (!general_operand (operands[1], data_mode))
23417 operands[1] = force_reg (data_mode, operands[1]);
23418 if (!general_operand (operands[2], data_mode))
23419 operands[2] = force_reg (data_mode, operands[2]);
23421 x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1,
23422 operands[1], operands[2], &negate);
23427 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
23428 operands[2-negate]);
23432 /* AVX512F does support 64-byte integer vector operations,
23433 thus the longest vector we are faced with is V64QImode. */
23434 #define MAX_VECT_LEN 64
23436 struct expand_vec_perm_d
23438 rtx target, op0, op1;
23439 unsigned char perm[MAX_VECT_LEN];
23440 machine_mode vmode;
23441 unsigned char nelt;
23442 bool one_operand_p;
23447 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
23448 struct expand_vec_perm_d *d)
23450 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
23451 expander, so args are either in d, or in op0, op1 etc. */
23452 machine_mode mode = GET_MODE (d ? d->op0 : op0);
23453 machine_mode maskmode = mode;
23454 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
23459 if (TARGET_AVX512VL && TARGET_AVX512BW)
23460 gen = gen_avx512vl_vpermi2varv8hi3;
23463 if (TARGET_AVX512VL && TARGET_AVX512BW)
23464 gen = gen_avx512vl_vpermi2varv16hi3;
23467 if (TARGET_AVX512VBMI)
23468 gen = gen_avx512bw_vpermi2varv64qi3;
23471 if (TARGET_AVX512BW)
23472 gen = gen_avx512bw_vpermi2varv32hi3;
23475 if (TARGET_AVX512VL)
23476 gen = gen_avx512vl_vpermi2varv4si3;
23479 if (TARGET_AVX512VL)
23480 gen = gen_avx512vl_vpermi2varv8si3;
23483 if (TARGET_AVX512F)
23484 gen = gen_avx512f_vpermi2varv16si3;
23487 if (TARGET_AVX512VL)
23489 gen = gen_avx512vl_vpermi2varv4sf3;
23490 maskmode = V4SImode;
23494 if (TARGET_AVX512VL)
23496 gen = gen_avx512vl_vpermi2varv8sf3;
23497 maskmode = V8SImode;
23501 if (TARGET_AVX512F)
23503 gen = gen_avx512f_vpermi2varv16sf3;
23504 maskmode = V16SImode;
23508 if (TARGET_AVX512VL)
23509 gen = gen_avx512vl_vpermi2varv2di3;
23512 if (TARGET_AVX512VL)
23513 gen = gen_avx512vl_vpermi2varv4di3;
23516 if (TARGET_AVX512F)
23517 gen = gen_avx512f_vpermi2varv8di3;
23520 if (TARGET_AVX512VL)
23522 gen = gen_avx512vl_vpermi2varv2df3;
23523 maskmode = V2DImode;
23527 if (TARGET_AVX512VL)
23529 gen = gen_avx512vl_vpermi2varv4df3;
23530 maskmode = V4DImode;
23534 if (TARGET_AVX512F)
23536 gen = gen_avx512f_vpermi2varv8df3;
23537 maskmode = V8DImode;
23547 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
23548 expander, so args are either in d, or in op0, op1 etc. */
23552 target = d->target;
23555 for (int i = 0; i < d->nelt; ++i)
23556 vec[i] = GEN_INT (d->perm[i]);
23557 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
23560 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
23564 /* Expand a variable vector permutation. */
23567 ix86_expand_vec_perm (rtx operands[])
23569 rtx target = operands[0];
23570 rtx op0 = operands[1];
23571 rtx op1 = operands[2];
23572 rtx mask = operands[3];
23573 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
23574 machine_mode mode = GET_MODE (op0);
23575 machine_mode maskmode = GET_MODE (mask);
23577 bool one_operand_shuffle = rtx_equal_p (op0, op1);
23579 /* Number of elements in the vector. */
23580 w = GET_MODE_NUNITS (mode);
23581 e = GET_MODE_UNIT_SIZE (mode);
23582 gcc_assert (w <= 64);
23584 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
23589 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
23591 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
23592 an constant shuffle operand. With a tiny bit of effort we can
23593 use VPERMD instead. A re-interpretation stall for V4DFmode is
23594 unfortunate but there's no avoiding it.
23595 Similarly for V16HImode we don't have instructions for variable
23596 shuffling, while for V32QImode we can use after preparing suitable
23597 masks vpshufb; vpshufb; vpermq; vpor. */
23599 if (mode == V16HImode)
23601 maskmode = mode = V32QImode;
23607 maskmode = mode = V8SImode;
23611 t1 = gen_reg_rtx (maskmode);
23613 /* Replicate the low bits of the V4DImode mask into V8SImode:
23615 t1 = { A A B B C C D D }. */
23616 for (i = 0; i < w / 2; ++i)
23617 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
23618 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23619 vt = force_reg (maskmode, vt);
23620 mask = gen_lowpart (maskmode, mask);
23621 if (maskmode == V8SImode)
23622 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
23624 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
23626 /* Multiply the shuffle indicies by two. */
23627 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
23630 /* Add one to the odd shuffle indicies:
23631 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
23632 for (i = 0; i < w / 2; ++i)
23634 vec[i * 2] = const0_rtx;
23635 vec[i * 2 + 1] = const1_rtx;
23637 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23638 vt = validize_mem (force_const_mem (maskmode, vt));
23639 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
23642 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
23643 operands[3] = mask = t1;
23644 target = gen_reg_rtx (mode);
23645 op0 = gen_lowpart (mode, op0);
23646 op1 = gen_lowpart (mode, op1);
23652 /* The VPERMD and VPERMPS instructions already properly ignore
23653 the high bits of the shuffle elements. No need for us to
23654 perform an AND ourselves. */
23655 if (one_operand_shuffle)
23657 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
23658 if (target != operands[0])
23659 emit_move_insn (operands[0],
23660 gen_lowpart (GET_MODE (operands[0]), target));
23664 t1 = gen_reg_rtx (V8SImode);
23665 t2 = gen_reg_rtx (V8SImode);
23666 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
23667 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
23673 mask = gen_lowpart (V8SImode, mask);
23674 if (one_operand_shuffle)
23675 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
23678 t1 = gen_reg_rtx (V8SFmode);
23679 t2 = gen_reg_rtx (V8SFmode);
23680 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
23681 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
23687 /* By combining the two 128-bit input vectors into one 256-bit
23688 input vector, we can use VPERMD and VPERMPS for the full
23689 two-operand shuffle. */
23690 t1 = gen_reg_rtx (V8SImode);
23691 t2 = gen_reg_rtx (V8SImode);
23692 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
23693 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
23694 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
23695 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
23699 t1 = gen_reg_rtx (V8SFmode);
23700 t2 = gen_reg_rtx (V8SImode);
23701 mask = gen_lowpart (V4SImode, mask);
23702 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
23703 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
23704 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
23705 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
23709 t1 = gen_reg_rtx (V32QImode);
23710 t2 = gen_reg_rtx (V32QImode);
23711 t3 = gen_reg_rtx (V32QImode);
23712 vt2 = GEN_INT (-128);
23713 for (i = 0; i < 32; i++)
23715 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
23716 vt = force_reg (V32QImode, vt);
23717 for (i = 0; i < 32; i++)
23718 vec[i] = i < 16 ? vt2 : const0_rtx;
23719 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
23720 vt2 = force_reg (V32QImode, vt2);
23721 /* From mask create two adjusted masks, which contain the same
23722 bits as mask in the low 7 bits of each vector element.
23723 The first mask will have the most significant bit clear
23724 if it requests element from the same 128-bit lane
23725 and MSB set if it requests element from the other 128-bit lane.
23726 The second mask will have the opposite values of the MSB,
23727 and additionally will have its 128-bit lanes swapped.
23728 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
23729 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
23730 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
23731 stands for other 12 bytes. */
23732 /* The bit whether element is from the same lane or the other
23733 lane is bit 4, so shift it up by 3 to the MSB position. */
23734 t5 = gen_reg_rtx (V4DImode);
23735 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
23737 /* Clear MSB bits from the mask just in case it had them set. */
23738 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
23739 /* After this t1 will have MSB set for elements from other lane. */
23740 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
23741 /* Clear bits other than MSB. */
23742 emit_insn (gen_andv32qi3 (t1, t1, vt));
23743 /* Or in the lower bits from mask into t3. */
23744 emit_insn (gen_iorv32qi3 (t3, t1, t2));
23745 /* And invert MSB bits in t1, so MSB is set for elements from the same
23747 emit_insn (gen_xorv32qi3 (t1, t1, vt));
23748 /* Swap 128-bit lanes in t3. */
23749 t6 = gen_reg_rtx (V4DImode);
23750 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
23751 const2_rtx, GEN_INT (3),
23752 const0_rtx, const1_rtx));
23753 /* And or in the lower bits from mask into t1. */
23754 emit_insn (gen_iorv32qi3 (t1, t1, t2));
23755 if (one_operand_shuffle)
23757 /* Each of these shuffles will put 0s in places where
23758 element from the other 128-bit lane is needed, otherwise
23759 will shuffle in the requested value. */
23760 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
23761 gen_lowpart (V32QImode, t6)));
23762 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
23763 /* For t3 the 128-bit lanes are swapped again. */
23764 t7 = gen_reg_rtx (V4DImode);
23765 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
23766 const2_rtx, GEN_INT (3),
23767 const0_rtx, const1_rtx));
23768 /* And oring both together leads to the result. */
23769 emit_insn (gen_iorv32qi3 (target, t1,
23770 gen_lowpart (V32QImode, t7)));
23771 if (target != operands[0])
23772 emit_move_insn (operands[0],
23773 gen_lowpart (GET_MODE (operands[0]), target));
23777 t4 = gen_reg_rtx (V32QImode);
23778 /* Similarly to the above one_operand_shuffle code,
23779 just for repeated twice for each operand. merge_two:
23780 code will merge the two results together. */
23781 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
23782 gen_lowpart (V32QImode, t6)));
23783 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
23784 gen_lowpart (V32QImode, t6)));
23785 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
23786 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
23787 t7 = gen_reg_rtx (V4DImode);
23788 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
23789 const2_rtx, GEN_INT (3),
23790 const0_rtx, const1_rtx));
23791 t8 = gen_reg_rtx (V4DImode);
23792 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
23793 const2_rtx, GEN_INT (3),
23794 const0_rtx, const1_rtx));
23795 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
23796 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
23802 gcc_assert (GET_MODE_SIZE (mode) <= 16);
23809 /* The XOP VPPERM insn supports three inputs. By ignoring the
23810 one_operand_shuffle special case, we avoid creating another
23811 set of constant vectors in memory. */
23812 one_operand_shuffle = false;
23814 /* mask = mask & {2*w-1, ...} */
23815 vt = GEN_INT (2*w - 1);
23819 /* mask = mask & {w-1, ...} */
23820 vt = GEN_INT (w - 1);
23823 for (i = 0; i < w; i++)
23825 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23826 mask = expand_simple_binop (maskmode, AND, mask, vt,
23827 NULL_RTX, 0, OPTAB_DIRECT);
23829 /* For non-QImode operations, convert the word permutation control
23830 into a byte permutation control. */
23831 if (mode != V16QImode)
23833 mask = expand_simple_binop (maskmode, ASHIFT, mask,
23834 GEN_INT (exact_log2 (e)),
23835 NULL_RTX, 0, OPTAB_DIRECT);
23837 /* Convert mask to vector of chars. */
23838 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
23840 /* Replicate each of the input bytes into byte positions:
23841 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
23842 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
23843 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
23844 for (i = 0; i < 16; ++i)
23845 vec[i] = GEN_INT (i/e * e);
23846 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
23847 vt = validize_mem (force_const_mem (V16QImode, vt));
23849 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
23851 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
23853 /* Convert it into the byte positions by doing
23854 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
23855 for (i = 0; i < 16; ++i)
23856 vec[i] = GEN_INT (i % e);
23857 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
23858 vt = validize_mem (force_const_mem (V16QImode, vt));
23859 emit_insn (gen_addv16qi3 (mask, mask, vt));
23862 /* The actual shuffle operations all operate on V16QImode. */
23863 op0 = gen_lowpart (V16QImode, op0);
23864 op1 = gen_lowpart (V16QImode, op1);
23868 if (GET_MODE (target) != V16QImode)
23869 target = gen_reg_rtx (V16QImode);
23870 emit_insn (gen_xop_pperm (target, op0, op1, mask));
23871 if (target != operands[0])
23872 emit_move_insn (operands[0],
23873 gen_lowpart (GET_MODE (operands[0]), target));
23875 else if (one_operand_shuffle)
23877 if (GET_MODE (target) != V16QImode)
23878 target = gen_reg_rtx (V16QImode);
23879 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
23880 if (target != operands[0])
23881 emit_move_insn (operands[0],
23882 gen_lowpart (GET_MODE (operands[0]), target));
23889 /* Shuffle the two input vectors independently. */
23890 t1 = gen_reg_rtx (V16QImode);
23891 t2 = gen_reg_rtx (V16QImode);
23892 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
23893 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
23896 /* Then merge them together. The key is whether any given control
23897 element contained a bit set that indicates the second word. */
23898 mask = operands[3];
23900 if (maskmode == V2DImode && !TARGET_SSE4_1)
23902 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
23903 more shuffle to convert the V2DI input mask into a V4SI
23904 input mask. At which point the masking that expand_int_vcond
23905 will work as desired. */
23906 rtx t3 = gen_reg_rtx (V4SImode);
23907 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
23908 const0_rtx, const0_rtx,
23909 const2_rtx, const2_rtx));
23911 maskmode = V4SImode;
23915 for (i = 0; i < w; i++)
23917 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23918 vt = force_reg (maskmode, vt);
23919 mask = expand_simple_binop (maskmode, AND, mask, vt,
23920 NULL_RTX, 0, OPTAB_DIRECT);
23922 if (GET_MODE (target) != mode)
23923 target = gen_reg_rtx (mode);
23925 xops[1] = gen_lowpart (mode, t2);
23926 xops[2] = gen_lowpart (mode, t1);
23927 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
23930 ok = ix86_expand_int_vcond (xops);
23932 if (target != operands[0])
23933 emit_move_insn (operands[0],
23934 gen_lowpart (GET_MODE (operands[0]), target));
23938 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
23939 true if we should do zero extension, else sign extension. HIGH_P is
23940 true if we want the N/2 high elements, else the low elements. */
23943 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
23945 machine_mode imode = GET_MODE (src);
23950 rtx (*unpack)(rtx, rtx);
23951 rtx (*extract)(rtx, rtx) = NULL;
23952 machine_mode halfmode = BLKmode;
23958 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
23960 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
23961 halfmode = V32QImode;
23963 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
23967 unpack = gen_avx2_zero_extendv16qiv16hi2;
23969 unpack = gen_avx2_sign_extendv16qiv16hi2;
23970 halfmode = V16QImode;
23972 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
23976 unpack = gen_avx512f_zero_extendv16hiv16si2;
23978 unpack = gen_avx512f_sign_extendv16hiv16si2;
23979 halfmode = V16HImode;
23981 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
23985 unpack = gen_avx2_zero_extendv8hiv8si2;
23987 unpack = gen_avx2_sign_extendv8hiv8si2;
23988 halfmode = V8HImode;
23990 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
23994 unpack = gen_avx512f_zero_extendv8siv8di2;
23996 unpack = gen_avx512f_sign_extendv8siv8di2;
23997 halfmode = V8SImode;
23999 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
24003 unpack = gen_avx2_zero_extendv4siv4di2;
24005 unpack = gen_avx2_sign_extendv4siv4di2;
24006 halfmode = V4SImode;
24008 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
24012 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
24014 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
24018 unpack = gen_sse4_1_zero_extendv4hiv4si2;
24020 unpack = gen_sse4_1_sign_extendv4hiv4si2;
24024 unpack = gen_sse4_1_zero_extendv2siv2di2;
24026 unpack = gen_sse4_1_sign_extendv2siv2di2;
24029 gcc_unreachable ();
24032 if (GET_MODE_SIZE (imode) >= 32)
24034 tmp = gen_reg_rtx (halfmode);
24035 emit_insn (extract (tmp, src));
24039 /* Shift higher 8 bytes to lower 8 bytes. */
24040 tmp = gen_reg_rtx (V1TImode);
24041 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
24043 tmp = gen_lowpart (imode, tmp);
24048 emit_insn (unpack (dest, tmp));
24052 rtx (*unpack)(rtx, rtx, rtx);
24058 unpack = gen_vec_interleave_highv16qi;
24060 unpack = gen_vec_interleave_lowv16qi;
24064 unpack = gen_vec_interleave_highv8hi;
24066 unpack = gen_vec_interleave_lowv8hi;
24070 unpack = gen_vec_interleave_highv4si;
24072 unpack = gen_vec_interleave_lowv4si;
24075 gcc_unreachable ();
24079 tmp = force_reg (imode, CONST0_RTX (imode));
24081 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
24082 src, pc_rtx, pc_rtx);
24084 rtx tmp2 = gen_reg_rtx (imode);
24085 emit_insn (unpack (tmp2, src, tmp));
24086 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
24090 /* Expand conditional increment or decrement using adb/sbb instructions.
24091 The default case using setcc followed by the conditional move can be
24092 done by generic code. */
24094 ix86_expand_int_addcc (rtx operands[])
24096 enum rtx_code code = GET_CODE (operands[1]);
24098 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
24100 rtx val = const0_rtx;
24101 bool fpcmp = false;
24103 rtx op0 = XEXP (operands[1], 0);
24104 rtx op1 = XEXP (operands[1], 1);
24106 if (operands[3] != const1_rtx
24107 && operands[3] != constm1_rtx)
24109 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
24111 code = GET_CODE (compare_op);
24113 flags = XEXP (compare_op, 0);
24115 if (GET_MODE (flags) == CCFPmode
24116 || GET_MODE (flags) == CCFPUmode)
24119 code = ix86_fp_compare_code_to_integer (code);
24126 PUT_CODE (compare_op,
24127 reverse_condition_maybe_unordered
24128 (GET_CODE (compare_op)));
24130 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
24133 mode = GET_MODE (operands[0]);
24135 /* Construct either adc or sbb insn. */
24136 if ((code == LTU) == (operands[3] == constm1_rtx))
24141 insn = gen_subqi3_carry;
24144 insn = gen_subhi3_carry;
24147 insn = gen_subsi3_carry;
24150 insn = gen_subdi3_carry;
24153 gcc_unreachable ();
24161 insn = gen_addqi3_carry;
24164 insn = gen_addhi3_carry;
24167 insn = gen_addsi3_carry;
24170 insn = gen_adddi3_carry;
24173 gcc_unreachable ();
24176 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
24182 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
24183 but works for floating pointer parameters and nonoffsetable memories.
24184 For pushes, it returns just stack offsets; the values will be saved
24185 in the right order. Maximally three parts are generated. */
24188 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
24193 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
24195 size = (GET_MODE_SIZE (mode) + 4) / 8;
24197 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
24198 gcc_assert (size >= 2 && size <= 4);
24200 /* Optimize constant pool reference to immediates. This is used by fp
24201 moves, that force all constants to memory to allow combining. */
24202 if (MEM_P (operand) && MEM_READONLY_P (operand))
24204 rtx tmp = maybe_get_pool_constant (operand);
24209 if (MEM_P (operand) && !offsettable_memref_p (operand))
24211 /* The only non-offsetable memories we handle are pushes. */
24212 int ok = push_operand (operand, VOIDmode);
24216 operand = copy_rtx (operand);
24217 PUT_MODE (operand, word_mode);
24218 parts[0] = parts[1] = parts[2] = parts[3] = operand;
24222 if (GET_CODE (operand) == CONST_VECTOR)
24224 machine_mode imode = int_mode_for_mode (mode);
24225 /* Caution: if we looked through a constant pool memory above,
24226 the operand may actually have a different mode now. That's
24227 ok, since we want to pun this all the way back to an integer. */
24228 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
24229 gcc_assert (operand != NULL);
24235 if (mode == DImode)
24236 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
24241 if (REG_P (operand))
24243 gcc_assert (reload_completed);
24244 for (i = 0; i < size; i++)
24245 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
24247 else if (offsettable_memref_p (operand))
24249 operand = adjust_address (operand, SImode, 0);
24250 parts[0] = operand;
24251 for (i = 1; i < size; i++)
24252 parts[i] = adjust_address (operand, SImode, 4 * i);
24254 else if (CONST_DOUBLE_P (operand))
24256 const REAL_VALUE_TYPE *r;
24259 r = CONST_DOUBLE_REAL_VALUE (operand);
24263 real_to_target (l, r, mode);
24264 parts[3] = gen_int_mode (l[3], SImode);
24265 parts[2] = gen_int_mode (l[2], SImode);
24268 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
24269 long double may not be 80-bit. */
24270 real_to_target (l, r, mode);
24271 parts[2] = gen_int_mode (l[2], SImode);
24274 REAL_VALUE_TO_TARGET_DOUBLE (*r, l);
24277 gcc_unreachable ();
24279 parts[1] = gen_int_mode (l[1], SImode);
24280 parts[0] = gen_int_mode (l[0], SImode);
24283 gcc_unreachable ();
24288 if (mode == TImode)
24289 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
24290 if (mode == XFmode || mode == TFmode)
24292 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
24293 if (REG_P (operand))
24295 gcc_assert (reload_completed);
24296 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
24297 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
24299 else if (offsettable_memref_p (operand))
24301 operand = adjust_address (operand, DImode, 0);
24302 parts[0] = operand;
24303 parts[1] = adjust_address (operand, upper_mode, 8);
24305 else if (CONST_DOUBLE_P (operand))
24309 real_to_target (l, CONST_DOUBLE_REAL_VALUE (operand), mode);
24311 /* real_to_target puts 32-bit pieces in each long. */
24314 ((l[0] & (HOST_WIDE_INT) 0xffffffff)
24315 | ((l[1] & (HOST_WIDE_INT) 0xffffffff) << 32),
24318 if (upper_mode == SImode)
24319 parts[1] = gen_int_mode (l[2], SImode);
24323 ((l[2] & (HOST_WIDE_INT) 0xffffffff)
24324 | ((l[3] & (HOST_WIDE_INT) 0xffffffff) << 32),
24328 gcc_unreachable ();
24335 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
24336 Return false when normal moves are needed; true when all required
24337 insns have been emitted. Operands 2-4 contain the input values
24338 int the correct order; operands 5-7 contain the output values. */
24341 ix86_split_long_move (rtx operands[])
24346 int collisions = 0;
24347 machine_mode mode = GET_MODE (operands[0]);
24348 bool collisionparts[4];
24350 /* The DFmode expanders may ask us to move double.
24351 For 64bit target this is single move. By hiding the fact
24352 here we simplify i386.md splitters. */
24353 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
24355 /* Optimize constant pool reference to immediates. This is used by
24356 fp moves, that force all constants to memory to allow combining. */
24358 if (MEM_P (operands[1])
24359 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
24360 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
24361 operands[1] = get_pool_constant (XEXP (operands[1], 0));
24362 if (push_operand (operands[0], VOIDmode))
24364 operands[0] = copy_rtx (operands[0]);
24365 PUT_MODE (operands[0], word_mode);
24368 operands[0] = gen_lowpart (DImode, operands[0]);
24369 operands[1] = gen_lowpart (DImode, operands[1]);
24370 emit_move_insn (operands[0], operands[1]);
24374 /* The only non-offsettable memory we handle is push. */
24375 if (push_operand (operands[0], VOIDmode))
24378 gcc_assert (!MEM_P (operands[0])
24379 || offsettable_memref_p (operands[0]));
24381 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
24382 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
24384 /* When emitting push, take care for source operands on the stack. */
24385 if (push && MEM_P (operands[1])
24386 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
24388 rtx src_base = XEXP (part[1][nparts - 1], 0);
24390 /* Compensate for the stack decrement by 4. */
24391 if (!TARGET_64BIT && nparts == 3
24392 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
24393 src_base = plus_constant (Pmode, src_base, 4);
24395 /* src_base refers to the stack pointer and is
24396 automatically decreased by emitted push. */
24397 for (i = 0; i < nparts; i++)
24398 part[1][i] = change_address (part[1][i],
24399 GET_MODE (part[1][i]), src_base);
24402 /* We need to do copy in the right order in case an address register
24403 of the source overlaps the destination. */
24404 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
24408 for (i = 0; i < nparts; i++)
24411 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
24412 if (collisionparts[i])
24416 /* Collision in the middle part can be handled by reordering. */
24417 if (collisions == 1 && nparts == 3 && collisionparts [1])
24419 std::swap (part[0][1], part[0][2]);
24420 std::swap (part[1][1], part[1][2]);
24422 else if (collisions == 1
24424 && (collisionparts [1] || collisionparts [2]))
24426 if (collisionparts [1])
24428 std::swap (part[0][1], part[0][2]);
24429 std::swap (part[1][1], part[1][2]);
24433 std::swap (part[0][2], part[0][3]);
24434 std::swap (part[1][2], part[1][3]);
24438 /* If there are more collisions, we can't handle it by reordering.
24439 Do an lea to the last part and use only one colliding move. */
24440 else if (collisions > 1)
24442 rtx base, addr, tls_base = NULL_RTX;
24446 base = part[0][nparts - 1];
24448 /* Handle the case when the last part isn't valid for lea.
24449 Happens in 64-bit mode storing the 12-byte XFmode. */
24450 if (GET_MODE (base) != Pmode)
24451 base = gen_rtx_REG (Pmode, REGNO (base));
24453 addr = XEXP (part[1][0], 0);
24454 if (TARGET_TLS_DIRECT_SEG_REFS)
24456 struct ix86_address parts;
24457 int ok = ix86_decompose_address (addr, &parts);
24459 if (parts.seg == DEFAULT_TLS_SEG_REG)
24461 /* It is not valid to use %gs: or %fs: in
24462 lea though, so we need to remove it from the
24463 address used for lea and add it to each individual
24464 memory loads instead. */
24465 addr = copy_rtx (addr);
24467 while (GET_CODE (*x) == PLUS)
24469 for (i = 0; i < 2; i++)
24471 rtx u = XEXP (*x, i);
24472 if (GET_CODE (u) == ZERO_EXTEND)
24474 if (GET_CODE (u) == UNSPEC
24475 && XINT (u, 1) == UNSPEC_TP)
24477 tls_base = XEXP (*x, i);
24478 *x = XEXP (*x, 1 - i);
24486 gcc_assert (tls_base);
24489 emit_insn (gen_rtx_SET (base, addr));
24491 base = gen_rtx_PLUS (GET_MODE (base), base, tls_base);
24492 part[1][0] = replace_equiv_address (part[1][0], base);
24493 for (i = 1; i < nparts; i++)
24496 base = copy_rtx (base);
24497 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
24498 part[1][i] = replace_equiv_address (part[1][i], tmp);
24509 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
24510 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
24511 stack_pointer_rtx, GEN_INT (-4)));
24512 emit_move_insn (part[0][2], part[1][2]);
24514 else if (nparts == 4)
24516 emit_move_insn (part[0][3], part[1][3]);
24517 emit_move_insn (part[0][2], part[1][2]);
24522 /* In 64bit mode we don't have 32bit push available. In case this is
24523 register, it is OK - we will just use larger counterpart. We also
24524 retype memory - these comes from attempt to avoid REX prefix on
24525 moving of second half of TFmode value. */
24526 if (GET_MODE (part[1][1]) == SImode)
24528 switch (GET_CODE (part[1][1]))
24531 part[1][1] = adjust_address (part[1][1], DImode, 0);
24535 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
24539 gcc_unreachable ();
24542 if (GET_MODE (part[1][0]) == SImode)
24543 part[1][0] = part[1][1];
24546 emit_move_insn (part[0][1], part[1][1]);
24547 emit_move_insn (part[0][0], part[1][0]);
24551 /* Choose correct order to not overwrite the source before it is copied. */
24552 if ((REG_P (part[0][0])
24553 && REG_P (part[1][1])
24554 && (REGNO (part[0][0]) == REGNO (part[1][1])
24556 && REGNO (part[0][0]) == REGNO (part[1][2]))
24558 && REGNO (part[0][0]) == REGNO (part[1][3]))))
24560 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
24562 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
24564 operands[2 + i] = part[0][j];
24565 operands[6 + i] = part[1][j];
24570 for (i = 0; i < nparts; i++)
24572 operands[2 + i] = part[0][i];
24573 operands[6 + i] = part[1][i];
24577 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
24578 if (optimize_insn_for_size_p ())
24580 for (j = 0; j < nparts - 1; j++)
24581 if (CONST_INT_P (operands[6 + j])
24582 && operands[6 + j] != const0_rtx
24583 && REG_P (operands[2 + j]))
24584 for (i = j; i < nparts - 1; i++)
24585 if (CONST_INT_P (operands[7 + i])
24586 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
24587 operands[7 + i] = operands[2 + j];
24590 for (i = 0; i < nparts; i++)
24591 emit_move_insn (operands[2 + i], operands[6 + i]);
24596 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
24597 left shift by a constant, either using a single shift or
24598 a sequence of add instructions. */
24601 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
24603 rtx (*insn)(rtx, rtx, rtx);
24606 || (count * ix86_cost->add <= ix86_cost->shift_const
24607 && !optimize_insn_for_size_p ()))
24609 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
24610 while (count-- > 0)
24611 emit_insn (insn (operand, operand, operand));
24615 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
24616 emit_insn (insn (operand, operand, GEN_INT (count)));
24621 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
24623 rtx (*gen_ashl3)(rtx, rtx, rtx);
24624 rtx (*gen_shld)(rtx, rtx, rtx);
24625 int half_width = GET_MODE_BITSIZE (mode) >> 1;
24627 rtx low[2], high[2];
24630 if (CONST_INT_P (operands[2]))
24632 split_double_mode (mode, operands, 2, low, high);
24633 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
24635 if (count >= half_width)
24637 emit_move_insn (high[0], low[1]);
24638 emit_move_insn (low[0], const0_rtx);
24640 if (count > half_width)
24641 ix86_expand_ashl_const (high[0], count - half_width, mode);
24645 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
24647 if (!rtx_equal_p (operands[0], operands[1]))
24648 emit_move_insn (operands[0], operands[1]);
24650 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
24651 ix86_expand_ashl_const (low[0], count, mode);
24656 split_double_mode (mode, operands, 1, low, high);
24658 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
24660 if (operands[1] == const1_rtx)
24662 /* Assuming we've chosen a QImode capable registers, then 1 << N
24663 can be done with two 32/64-bit shifts, no branches, no cmoves. */
24664 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
24666 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
24668 ix86_expand_clear (low[0]);
24669 ix86_expand_clear (high[0]);
24670 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
24672 d = gen_lowpart (QImode, low[0]);
24673 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
24674 s = gen_rtx_EQ (QImode, flags, const0_rtx);
24675 emit_insn (gen_rtx_SET (d, s));
24677 d = gen_lowpart (QImode, high[0]);
24678 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
24679 s = gen_rtx_NE (QImode, flags, const0_rtx);
24680 emit_insn (gen_rtx_SET (d, s));
24683 /* Otherwise, we can get the same results by manually performing
24684 a bit extract operation on bit 5/6, and then performing the two
24685 shifts. The two methods of getting 0/1 into low/high are exactly
24686 the same size. Avoiding the shift in the bit extract case helps
24687 pentium4 a bit; no one else seems to care much either way. */
24690 machine_mode half_mode;
24691 rtx (*gen_lshr3)(rtx, rtx, rtx);
24692 rtx (*gen_and3)(rtx, rtx, rtx);
24693 rtx (*gen_xor3)(rtx, rtx, rtx);
24694 HOST_WIDE_INT bits;
24697 if (mode == DImode)
24699 half_mode = SImode;
24700 gen_lshr3 = gen_lshrsi3;
24701 gen_and3 = gen_andsi3;
24702 gen_xor3 = gen_xorsi3;
24707 half_mode = DImode;
24708 gen_lshr3 = gen_lshrdi3;
24709 gen_and3 = gen_anddi3;
24710 gen_xor3 = gen_xordi3;
24714 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
24715 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
24717 x = gen_lowpart (half_mode, operands[2]);
24718 emit_insn (gen_rtx_SET (high[0], x));
24720 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
24721 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
24722 emit_move_insn (low[0], high[0]);
24723 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
24726 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
24727 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
24731 if (operands[1] == constm1_rtx)
24733 /* For -1 << N, we can avoid the shld instruction, because we
24734 know that we're shifting 0...31/63 ones into a -1. */
24735 emit_move_insn (low[0], constm1_rtx);
24736 if (optimize_insn_for_size_p ())
24737 emit_move_insn (high[0], low[0]);
24739 emit_move_insn (high[0], constm1_rtx);
24743 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
24745 if (!rtx_equal_p (operands[0], operands[1]))
24746 emit_move_insn (operands[0], operands[1]);
24748 split_double_mode (mode, operands, 1, low, high);
24749 emit_insn (gen_shld (high[0], low[0], operands[2]));
24752 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
24754 if (TARGET_CMOVE && scratch)
24756 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
24757 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
24759 ix86_expand_clear (scratch);
24760 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
24764 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
24765 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
24767 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
24772 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
24774 rtx (*gen_ashr3)(rtx, rtx, rtx)
24775 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
24776 rtx (*gen_shrd)(rtx, rtx, rtx);
24777 int half_width = GET_MODE_BITSIZE (mode) >> 1;
24779 rtx low[2], high[2];
24782 if (CONST_INT_P (operands[2]))
24784 split_double_mode (mode, operands, 2, low, high);
24785 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
24787 if (count == GET_MODE_BITSIZE (mode) - 1)
24789 emit_move_insn (high[0], high[1]);
24790 emit_insn (gen_ashr3 (high[0], high[0],
24791 GEN_INT (half_width - 1)));
24792 emit_move_insn (low[0], high[0]);
24795 else if (count >= half_width)
24797 emit_move_insn (low[0], high[1]);
24798 emit_move_insn (high[0], low[0]);
24799 emit_insn (gen_ashr3 (high[0], high[0],
24800 GEN_INT (half_width - 1)));
24802 if (count > half_width)
24803 emit_insn (gen_ashr3 (low[0], low[0],
24804 GEN_INT (count - half_width)));
24808 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24810 if (!rtx_equal_p (operands[0], operands[1]))
24811 emit_move_insn (operands[0], operands[1]);
24813 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
24814 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
24819 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24821 if (!rtx_equal_p (operands[0], operands[1]))
24822 emit_move_insn (operands[0], operands[1]);
24824 split_double_mode (mode, operands, 1, low, high);
24826 emit_insn (gen_shrd (low[0], high[0], operands[2]));
24827 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
24829 if (TARGET_CMOVE && scratch)
24831 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
24832 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
24834 emit_move_insn (scratch, high[0]);
24835 emit_insn (gen_ashr3 (scratch, scratch,
24836 GEN_INT (half_width - 1)));
24837 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
24842 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
24843 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
24845 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
24851 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
24853 rtx (*gen_lshr3)(rtx, rtx, rtx)
24854 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
24855 rtx (*gen_shrd)(rtx, rtx, rtx);
24856 int half_width = GET_MODE_BITSIZE (mode) >> 1;
24858 rtx low[2], high[2];
24861 if (CONST_INT_P (operands[2]))
24863 split_double_mode (mode, operands, 2, low, high);
24864 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
24866 if (count >= half_width)
24868 emit_move_insn (low[0], high[1]);
24869 ix86_expand_clear (high[0]);
24871 if (count > half_width)
24872 emit_insn (gen_lshr3 (low[0], low[0],
24873 GEN_INT (count - half_width)));
24877 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24879 if (!rtx_equal_p (operands[0], operands[1]))
24880 emit_move_insn (operands[0], operands[1]);
24882 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
24883 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
24888 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24890 if (!rtx_equal_p (operands[0], operands[1]))
24891 emit_move_insn (operands[0], operands[1]);
24893 split_double_mode (mode, operands, 1, low, high);
24895 emit_insn (gen_shrd (low[0], high[0], operands[2]));
24896 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
24898 if (TARGET_CMOVE && scratch)
24900 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
24901 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
24903 ix86_expand_clear (scratch);
24904 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
24909 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
24910 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
24912 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
24917 /* Predict just emitted jump instruction to be taken with probability PROB. */
24919 predict_jump (int prob)
24921 rtx insn = get_last_insn ();
24922 gcc_assert (JUMP_P (insn));
24923 add_int_reg_note (insn, REG_BR_PROB, prob);
24926 /* Helper function for the string operations below. Dest VARIABLE whether
24927 it is aligned to VALUE bytes. If true, jump to the label. */
24928 static rtx_code_label *
24929 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
24931 rtx_code_label *label = gen_label_rtx ();
24932 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
24933 if (GET_MODE (variable) == DImode)
24934 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
24936 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
24937 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
24940 predict_jump (REG_BR_PROB_BASE * 50 / 100);
24942 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24946 /* Adjust COUNTER by the VALUE. */
24948 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
24950 rtx (*gen_add)(rtx, rtx, rtx)
24951 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
24953 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
24956 /* Zero extend possibly SImode EXP to Pmode register. */
24958 ix86_zero_extend_to_Pmode (rtx exp)
24960 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
24963 /* Divide COUNTREG by SCALE. */
24965 scale_counter (rtx countreg, int scale)
24971 if (CONST_INT_P (countreg))
24972 return GEN_INT (INTVAL (countreg) / scale);
24973 gcc_assert (REG_P (countreg));
24975 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
24976 GEN_INT (exact_log2 (scale)),
24977 NULL, 1, OPTAB_DIRECT);
24981 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
24982 DImode for constant loop counts. */
24984 static machine_mode
24985 counter_mode (rtx count_exp)
24987 if (GET_MODE (count_exp) != VOIDmode)
24988 return GET_MODE (count_exp);
24989 if (!CONST_INT_P (count_exp))
24991 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
24996 /* Copy the address to a Pmode register. This is used for x32 to
24997 truncate DImode TLS address to a SImode register. */
25000 ix86_copy_addr_to_reg (rtx addr)
25003 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
25005 reg = copy_addr_to_reg (addr);
25006 REG_POINTER (reg) = 1;
25011 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
25012 reg = copy_to_mode_reg (DImode, addr);
25013 REG_POINTER (reg) = 1;
25014 return gen_rtx_SUBREG (SImode, reg, 0);
25018 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
25019 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
25020 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
25021 memory by VALUE (supposed to be in MODE).
25023 The size is rounded down to whole number of chunk size moved at once.
25024 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
25028 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
25029 rtx destptr, rtx srcptr, rtx value,
25030 rtx count, machine_mode mode, int unroll,
25031 int expected_size, bool issetmem)
25033 rtx_code_label *out_label, *top_label;
25035 machine_mode iter_mode = counter_mode (count);
25036 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
25037 rtx piece_size = GEN_INT (piece_size_n);
25038 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
25042 top_label = gen_label_rtx ();
25043 out_label = gen_label_rtx ();
25044 iter = gen_reg_rtx (iter_mode);
25046 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
25047 NULL, 1, OPTAB_DIRECT);
25048 /* Those two should combine. */
25049 if (piece_size == const1_rtx)
25051 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
25053 predict_jump (REG_BR_PROB_BASE * 10 / 100);
25055 emit_move_insn (iter, const0_rtx);
25057 emit_label (top_label);
25059 tmp = convert_modes (Pmode, iter_mode, iter, true);
25061 /* This assert could be relaxed - in this case we'll need to compute
25062 smallest power of two, containing in PIECE_SIZE_N and pass it to
25064 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
25065 destmem = offset_address (destmem, tmp, piece_size_n);
25066 destmem = adjust_address (destmem, mode, 0);
25070 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
25071 srcmem = adjust_address (srcmem, mode, 0);
25073 /* When unrolling for chips that reorder memory reads and writes,
25074 we can save registers by using single temporary.
25075 Also using 4 temporaries is overkill in 32bit mode. */
25076 if (!TARGET_64BIT && 0)
25078 for (i = 0; i < unroll; i++)
25083 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25085 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
25087 emit_move_insn (destmem, srcmem);
25093 gcc_assert (unroll <= 4);
25094 for (i = 0; i < unroll; i++)
25096 tmpreg[i] = gen_reg_rtx (mode);
25100 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
25102 emit_move_insn (tmpreg[i], srcmem);
25104 for (i = 0; i < unroll; i++)
25109 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25111 emit_move_insn (destmem, tmpreg[i]);
25116 for (i = 0; i < unroll; i++)
25120 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25121 emit_move_insn (destmem, value);
25124 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
25125 true, OPTAB_LIB_WIDEN);
25127 emit_move_insn (iter, tmp);
25129 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
25131 if (expected_size != -1)
25133 expected_size /= GET_MODE_SIZE (mode) * unroll;
25134 if (expected_size == 0)
25136 else if (expected_size > REG_BR_PROB_BASE)
25137 predict_jump (REG_BR_PROB_BASE - 1);
25139 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
25142 predict_jump (REG_BR_PROB_BASE * 80 / 100);
25143 iter = ix86_zero_extend_to_Pmode (iter);
25144 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
25145 true, OPTAB_LIB_WIDEN);
25146 if (tmp != destptr)
25147 emit_move_insn (destptr, tmp);
25150 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
25151 true, OPTAB_LIB_WIDEN);
25153 emit_move_insn (srcptr, tmp);
25155 emit_label (out_label);
25158 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
25159 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
25160 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
25161 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
25162 ORIG_VALUE is the original value passed to memset to fill the memory with.
25163 Other arguments have same meaning as for previous function. */
25166 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
25167 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
25169 machine_mode mode, bool issetmem)
25174 HOST_WIDE_INT rounded_count;
25176 /* If possible, it is shorter to use rep movs.
25177 TODO: Maybe it is better to move this logic to decide_alg. */
25178 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
25179 && (!issetmem || orig_value == const0_rtx))
25182 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
25183 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
25185 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
25186 GET_MODE_SIZE (mode)));
25187 if (mode != QImode)
25189 destexp = gen_rtx_ASHIFT (Pmode, countreg,
25190 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
25191 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
25194 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
25195 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
25198 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
25199 destmem = shallow_copy_rtx (destmem);
25200 set_mem_size (destmem, rounded_count);
25202 else if (MEM_SIZE_KNOWN_P (destmem))
25203 clear_mem_size (destmem);
25207 value = force_reg (mode, gen_lowpart (mode, value));
25208 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
25212 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
25213 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
25214 if (mode != QImode)
25216 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
25217 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
25218 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
25221 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
25222 if (CONST_INT_P (count))
25225 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
25226 srcmem = shallow_copy_rtx (srcmem);
25227 set_mem_size (srcmem, rounded_count);
25231 if (MEM_SIZE_KNOWN_P (srcmem))
25232 clear_mem_size (srcmem);
25234 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
25239 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
25241 SRC is passed by pointer to be updated on return.
25242 Return value is updated DST. */
25244 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
25245 HOST_WIDE_INT size_to_move)
25247 rtx dst = destmem, src = *srcmem, adjust, tempreg;
25248 enum insn_code code;
25249 machine_mode move_mode;
25252 /* Find the widest mode in which we could perform moves.
25253 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
25254 it until move of such size is supported. */
25255 piece_size = 1 << floor_log2 (size_to_move);
25256 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
25257 code = optab_handler (mov_optab, move_mode);
25258 while (code == CODE_FOR_nothing && piece_size > 1)
25261 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
25262 code = optab_handler (mov_optab, move_mode);
25265 /* Find the corresponding vector mode with the same size as MOVE_MODE.
25266 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
25267 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
25269 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
25270 move_mode = mode_for_vector (word_mode, nunits);
25271 code = optab_handler (mov_optab, move_mode);
25272 if (code == CODE_FOR_nothing)
25274 move_mode = word_mode;
25275 piece_size = GET_MODE_SIZE (move_mode);
25276 code = optab_handler (mov_optab, move_mode);
25279 gcc_assert (code != CODE_FOR_nothing);
25281 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
25282 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
25284 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
25285 gcc_assert (size_to_move % piece_size == 0);
25286 adjust = GEN_INT (piece_size);
25287 for (i = 0; i < size_to_move; i += piece_size)
25289 /* We move from memory to memory, so we'll need to do it via
25290 a temporary register. */
25291 tempreg = gen_reg_rtx (move_mode);
25292 emit_insn (GEN_FCN (code) (tempreg, src));
25293 emit_insn (GEN_FCN (code) (dst, tempreg));
25295 emit_move_insn (destptr,
25296 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
25297 emit_move_insn (srcptr,
25298 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
25300 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
25302 src = adjust_automodify_address_nv (src, move_mode, srcptr,
25306 /* Update DST and SRC rtx. */
25311 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
25313 expand_movmem_epilogue (rtx destmem, rtx srcmem,
25314 rtx destptr, rtx srcptr, rtx count, int max_size)
25317 if (CONST_INT_P (count))
25319 HOST_WIDE_INT countval = INTVAL (count);
25320 HOST_WIDE_INT epilogue_size = countval % max_size;
25323 /* For now MAX_SIZE should be a power of 2. This assert could be
25324 relaxed, but it'll require a bit more complicated epilogue
25326 gcc_assert ((max_size & (max_size - 1)) == 0);
25327 for (i = max_size; i >= 1; i >>= 1)
25329 if (epilogue_size & i)
25330 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
25336 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
25337 count, 1, OPTAB_DIRECT);
25338 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
25339 count, QImode, 1, 4, false);
25343 /* When there are stringops, we can cheaply increase dest and src pointers.
25344 Otherwise we save code size by maintaining offset (zero is readily
25345 available from preceding rep operation) and using x86 addressing modes.
25347 if (TARGET_SINGLE_STRINGOP)
25351 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
25352 src = change_address (srcmem, SImode, srcptr);
25353 dest = change_address (destmem, SImode, destptr);
25354 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25355 emit_label (label);
25356 LABEL_NUSES (label) = 1;
25360 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
25361 src = change_address (srcmem, HImode, srcptr);
25362 dest = change_address (destmem, HImode, destptr);
25363 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25364 emit_label (label);
25365 LABEL_NUSES (label) = 1;
25369 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
25370 src = change_address (srcmem, QImode, srcptr);
25371 dest = change_address (destmem, QImode, destptr);
25372 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25373 emit_label (label);
25374 LABEL_NUSES (label) = 1;
25379 rtx offset = force_reg (Pmode, const0_rtx);
25384 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
25385 src = change_address (srcmem, SImode, srcptr);
25386 dest = change_address (destmem, SImode, destptr);
25387 emit_move_insn (dest, src);
25388 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
25389 true, OPTAB_LIB_WIDEN);
25391 emit_move_insn (offset, tmp);
25392 emit_label (label);
25393 LABEL_NUSES (label) = 1;
25397 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
25398 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
25399 src = change_address (srcmem, HImode, tmp);
25400 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
25401 dest = change_address (destmem, HImode, tmp);
25402 emit_move_insn (dest, src);
25403 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
25404 true, OPTAB_LIB_WIDEN);
25406 emit_move_insn (offset, tmp);
25407 emit_label (label);
25408 LABEL_NUSES (label) = 1;
25412 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
25413 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
25414 src = change_address (srcmem, QImode, tmp);
25415 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
25416 dest = change_address (destmem, QImode, tmp);
25417 emit_move_insn (dest, src);
25418 emit_label (label);
25419 LABEL_NUSES (label) = 1;
25424 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
25425 with value PROMOTED_VAL.
25426 SRC is passed by pointer to be updated on return.
25427 Return value is updated DST. */
25429 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
25430 HOST_WIDE_INT size_to_move)
25432 rtx dst = destmem, adjust;
25433 enum insn_code code;
25434 machine_mode move_mode;
25437 /* Find the widest mode in which we could perform moves.
25438 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
25439 it until move of such size is supported. */
25440 move_mode = GET_MODE (promoted_val);
25441 if (move_mode == VOIDmode)
25442 move_mode = QImode;
25443 if (size_to_move < GET_MODE_SIZE (move_mode))
25445 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
25446 promoted_val = gen_lowpart (move_mode, promoted_val);
25448 piece_size = GET_MODE_SIZE (move_mode);
25449 code = optab_handler (mov_optab, move_mode);
25450 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
25452 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
25454 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
25455 gcc_assert (size_to_move % piece_size == 0);
25456 adjust = GEN_INT (piece_size);
25457 for (i = 0; i < size_to_move; i += piece_size)
25459 if (piece_size <= GET_MODE_SIZE (word_mode))
25461 emit_insn (gen_strset (destptr, dst, promoted_val));
25462 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
25467 emit_insn (GEN_FCN (code) (dst, promoted_val));
25469 emit_move_insn (destptr,
25470 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
25472 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
25476 /* Update DST rtx. */
25479 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
25481 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
25482 rtx count, int max_size)
25485 expand_simple_binop (counter_mode (count), AND, count,
25486 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
25487 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
25488 gen_lowpart (QImode, value), count, QImode,
25489 1, max_size / 2, true);
25492 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
25494 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
25495 rtx count, int max_size)
25499 if (CONST_INT_P (count))
25501 HOST_WIDE_INT countval = INTVAL (count);
25502 HOST_WIDE_INT epilogue_size = countval % max_size;
25505 /* For now MAX_SIZE should be a power of 2. This assert could be
25506 relaxed, but it'll require a bit more complicated epilogue
25508 gcc_assert ((max_size & (max_size - 1)) == 0);
25509 for (i = max_size; i >= 1; i >>= 1)
25511 if (epilogue_size & i)
25513 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
25514 destmem = emit_memset (destmem, destptr, vec_value, i);
25516 destmem = emit_memset (destmem, destptr, value, i);
25523 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
25528 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
25531 dest = change_address (destmem, DImode, destptr);
25532 emit_insn (gen_strset (destptr, dest, value));
25533 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
25534 emit_insn (gen_strset (destptr, dest, value));
25538 dest = change_address (destmem, SImode, destptr);
25539 emit_insn (gen_strset (destptr, dest, value));
25540 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
25541 emit_insn (gen_strset (destptr, dest, value));
25542 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
25543 emit_insn (gen_strset (destptr, dest, value));
25544 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
25545 emit_insn (gen_strset (destptr, dest, value));
25547 emit_label (label);
25548 LABEL_NUSES (label) = 1;
25552 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
25555 dest = change_address (destmem, DImode, destptr);
25556 emit_insn (gen_strset (destptr, dest, value));
25560 dest = change_address (destmem, SImode, destptr);
25561 emit_insn (gen_strset (destptr, dest, value));
25562 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
25563 emit_insn (gen_strset (destptr, dest, value));
25565 emit_label (label);
25566 LABEL_NUSES (label) = 1;
25570 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
25571 dest = change_address (destmem, SImode, destptr);
25572 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
25573 emit_label (label);
25574 LABEL_NUSES (label) = 1;
25578 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
25579 dest = change_address (destmem, HImode, destptr);
25580 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
25581 emit_label (label);
25582 LABEL_NUSES (label) = 1;
25586 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
25587 dest = change_address (destmem, QImode, destptr);
25588 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
25589 emit_label (label);
25590 LABEL_NUSES (label) = 1;
25594 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
25595 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
25596 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
25598 Return value is updated DESTMEM. */
25600 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
25601 rtx destptr, rtx srcptr, rtx value,
25602 rtx vec_value, rtx count, int align,
25603 int desired_alignment, bool issetmem)
25606 for (i = 1; i < desired_alignment; i <<= 1)
25610 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
25613 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
25614 destmem = emit_memset (destmem, destptr, vec_value, i);
25616 destmem = emit_memset (destmem, destptr, value, i);
25619 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
25620 ix86_adjust_counter (count, i);
25621 emit_label (label);
25622 LABEL_NUSES (label) = 1;
25623 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
25629 /* Test if COUNT&SIZE is nonzero and if so, expand movme
25630 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
25631 and jump to DONE_LABEL. */
25633 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
25634 rtx destptr, rtx srcptr,
25635 rtx value, rtx vec_value,
25636 rtx count, int size,
25637 rtx done_label, bool issetmem)
25639 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
25640 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
25644 /* If we do not have vector value to copy, we must reduce size. */
25649 if (GET_MODE (value) == VOIDmode && size > 8)
25651 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
25652 mode = GET_MODE (value);
25655 mode = GET_MODE (vec_value), value = vec_value;
25659 /* Choose appropriate vector mode. */
25661 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
25662 else if (size >= 16)
25663 mode = TARGET_SSE ? V16QImode : DImode;
25664 srcmem = change_address (srcmem, mode, srcptr);
25666 destmem = change_address (destmem, mode, destptr);
25667 modesize = GEN_INT (GET_MODE_SIZE (mode));
25668 gcc_assert (GET_MODE_SIZE (mode) <= size);
25669 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
25672 emit_move_insn (destmem, gen_lowpart (mode, value));
25675 emit_move_insn (destmem, srcmem);
25676 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
25678 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
25681 destmem = offset_address (destmem, count, 1);
25682 destmem = offset_address (destmem, GEN_INT (-2 * size),
25683 GET_MODE_SIZE (mode));
25686 srcmem = offset_address (srcmem, count, 1);
25687 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
25688 GET_MODE_SIZE (mode));
25690 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
25693 emit_move_insn (destmem, gen_lowpart (mode, value));
25696 emit_move_insn (destmem, srcmem);
25697 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
25699 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
25701 emit_jump_insn (gen_jump (done_label));
25704 emit_label (label);
25705 LABEL_NUSES (label) = 1;
25708 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
25709 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
25710 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
25711 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
25712 DONE_LABEL is a label after the whole copying sequence. The label is created
25713 on demand if *DONE_LABEL is NULL.
25714 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
25715 bounds after the initial copies.
25717 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
25718 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
25719 we will dispatch to a library call for large blocks.
25721 In pseudocode we do:
25725 Assume that SIZE is 4. Bigger sizes are handled analogously
25728 copy 4 bytes from SRCPTR to DESTPTR
25729 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
25734 copy 1 byte from SRCPTR to DESTPTR
25737 copy 2 bytes from SRCPTR to DESTPTR
25738 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
25743 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
25744 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
25746 OLD_DESPTR = DESTPTR;
25747 Align DESTPTR up to DESIRED_ALIGN
25748 SRCPTR += DESTPTR - OLD_DESTPTR
25749 COUNT -= DEST_PTR - OLD_DESTPTR
25751 Round COUNT down to multiple of SIZE
25752 << optional caller supplied zero size guard is here >>
25753 << optional caller supplied dynamic check is here >>
25754 << caller supplied main copy loop is here >>
25759 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
25760 rtx *destptr, rtx *srcptr,
25762 rtx value, rtx vec_value,
25764 rtx_code_label **done_label,
25768 unsigned HOST_WIDE_INT *min_size,
25769 bool dynamic_check,
25772 rtx_code_label *loop_label = NULL, *label;
25775 int prolog_size = 0;
25778 /* Chose proper value to copy. */
25779 if (issetmem && VECTOR_MODE_P (mode))
25780 mode_value = vec_value;
25782 mode_value = value;
25783 gcc_assert (GET_MODE_SIZE (mode) <= size);
25785 /* See if block is big or small, handle small blocks. */
25786 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
25789 loop_label = gen_label_rtx ();
25792 *done_label = gen_label_rtx ();
25794 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
25798 /* Handle sizes > 3. */
25799 for (;size2 > 2; size2 >>= 1)
25800 expand_small_movmem_or_setmem (destmem, srcmem,
25804 size2, *done_label, issetmem);
25805 /* Nothing to copy? Jump to DONE_LABEL if so */
25806 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
25809 /* Do a byte copy. */
25810 destmem = change_address (destmem, QImode, *destptr);
25812 emit_move_insn (destmem, gen_lowpart (QImode, value));
25815 srcmem = change_address (srcmem, QImode, *srcptr);
25816 emit_move_insn (destmem, srcmem);
25819 /* Handle sizes 2 and 3. */
25820 label = ix86_expand_aligntest (*count, 2, false);
25821 destmem = change_address (destmem, HImode, *destptr);
25822 destmem = offset_address (destmem, *count, 1);
25823 destmem = offset_address (destmem, GEN_INT (-2), 2);
25825 emit_move_insn (destmem, gen_lowpart (HImode, value));
25828 srcmem = change_address (srcmem, HImode, *srcptr);
25829 srcmem = offset_address (srcmem, *count, 1);
25830 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
25831 emit_move_insn (destmem, srcmem);
25834 emit_label (label);
25835 LABEL_NUSES (label) = 1;
25836 emit_jump_insn (gen_jump (*done_label));
25840 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
25841 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
25843 /* Start memcpy for COUNT >= SIZE. */
25846 emit_label (loop_label);
25847 LABEL_NUSES (loop_label) = 1;
25850 /* Copy first desired_align bytes. */
25852 srcmem = change_address (srcmem, mode, *srcptr);
25853 destmem = change_address (destmem, mode, *destptr);
25854 modesize = GEN_INT (GET_MODE_SIZE (mode));
25855 for (n = 0; prolog_size < desired_align - align; n++)
25858 emit_move_insn (destmem, mode_value);
25861 emit_move_insn (destmem, srcmem);
25862 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
25864 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
25865 prolog_size += GET_MODE_SIZE (mode);
25869 /* Copy last SIZE bytes. */
25870 destmem = offset_address (destmem, *count, 1);
25871 destmem = offset_address (destmem,
25872 GEN_INT (-size - prolog_size),
25875 emit_move_insn (destmem, mode_value);
25878 srcmem = offset_address (srcmem, *count, 1);
25879 srcmem = offset_address (srcmem,
25880 GEN_INT (-size - prolog_size),
25882 emit_move_insn (destmem, srcmem);
25884 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
25886 destmem = offset_address (destmem, modesize, 1);
25888 emit_move_insn (destmem, mode_value);
25891 srcmem = offset_address (srcmem, modesize, 1);
25892 emit_move_insn (destmem, srcmem);
25896 /* Align destination. */
25897 if (desired_align > 1 && desired_align > align)
25899 rtx saveddest = *destptr;
25901 gcc_assert (desired_align <= size);
25902 /* Align destptr up, place it to new register. */
25903 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
25904 GEN_INT (prolog_size),
25905 NULL_RTX, 1, OPTAB_DIRECT);
25906 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
25907 REG_POINTER (*destptr) = 1;
25908 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
25909 GEN_INT (-desired_align),
25910 *destptr, 1, OPTAB_DIRECT);
25911 /* See how many bytes we skipped. */
25912 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
25914 saveddest, 1, OPTAB_DIRECT);
25915 /* Adjust srcptr and count. */
25917 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
25918 saveddest, *srcptr, 1, OPTAB_DIRECT);
25919 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
25920 saveddest, *count, 1, OPTAB_DIRECT);
25921 /* We copied at most size + prolog_size. */
25922 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
25924 = ROUND_DOWN (*min_size - size, (unsigned HOST_WIDE_INT)size);
25928 /* Our loops always round down the block size, but for dispatch to
25929 library we need precise value. */
25931 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
25932 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
25936 gcc_assert (prolog_size == 0);
25937 /* Decrease count, so we won't end up copying last word twice. */
25938 if (!CONST_INT_P (*count))
25939 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
25940 constm1_rtx, *count, 1, OPTAB_DIRECT);
25942 *count = GEN_INT (ROUND_DOWN (UINTVAL (*count) - 1,
25943 (unsigned HOST_WIDE_INT)size));
25945 *min_size = ROUND_DOWN (*min_size - 1, (unsigned HOST_WIDE_INT)size);
25950 /* This function is like the previous one, except here we know how many bytes
25951 need to be copied. That allows us to update alignment not only of DST, which
25952 is returned, but also of SRC, which is passed as a pointer for that
25955 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
25956 rtx srcreg, rtx value, rtx vec_value,
25957 int desired_align, int align_bytes,
25961 rtx orig_dst = dst;
25962 rtx orig_src = NULL;
25963 int piece_size = 1;
25964 int copied_bytes = 0;
25968 gcc_assert (srcp != NULL);
25973 for (piece_size = 1;
25974 piece_size <= desired_align && copied_bytes < align_bytes;
25977 if (align_bytes & piece_size)
25981 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
25982 dst = emit_memset (dst, destreg, vec_value, piece_size);
25984 dst = emit_memset (dst, destreg, value, piece_size);
25987 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
25988 copied_bytes += piece_size;
25991 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
25992 set_mem_align (dst, desired_align * BITS_PER_UNIT);
25993 if (MEM_SIZE_KNOWN_P (orig_dst))
25994 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
25998 int src_align_bytes = get_mem_align_offset (src, desired_align
26000 if (src_align_bytes >= 0)
26001 src_align_bytes = desired_align - src_align_bytes;
26002 if (src_align_bytes >= 0)
26004 unsigned int src_align;
26005 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
26007 if ((src_align_bytes & (src_align - 1))
26008 == (align_bytes & (src_align - 1)))
26011 if (src_align > (unsigned int) desired_align)
26012 src_align = desired_align;
26013 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
26014 set_mem_align (src, src_align * BITS_PER_UNIT);
26016 if (MEM_SIZE_KNOWN_P (orig_src))
26017 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
26024 /* Return true if ALG can be used in current context.
26025 Assume we expand memset if MEMSET is true. */
26027 alg_usable_p (enum stringop_alg alg, bool memset, bool have_as)
26029 if (alg == no_stringop)
26031 if (alg == vector_loop)
26032 return TARGET_SSE || TARGET_AVX;
26033 /* Algorithms using the rep prefix want at least edi and ecx;
26034 additionally, memset wants eax and memcpy wants esi. Don't
26035 consider such algorithms if the user has appropriated those
26036 registers for their own purposes, or if we have a non-default
26037 address space, since some string insns cannot override the segment. */
26038 if (alg == rep_prefix_1_byte
26039 || alg == rep_prefix_4_byte
26040 || alg == rep_prefix_8_byte)
26044 if (fixed_regs[CX_REG]
26045 || fixed_regs[DI_REG]
26046 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]))
26052 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
26053 static enum stringop_alg
26054 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
26055 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
26056 bool memset, bool zero_memset, bool have_as,
26057 int *dynamic_check, bool *noalign, bool recur)
26059 const struct stringop_algs *algs;
26060 bool optimize_for_speed;
26062 const struct processor_costs *cost;
26064 bool any_alg_usable_p = false;
26067 *dynamic_check = -1;
26069 /* Even if the string operation call is cold, we still might spend a lot
26070 of time processing large blocks. */
26071 if (optimize_function_for_size_p (cfun)
26072 || (optimize_insn_for_size_p ()
26074 || (expected_size != -1 && expected_size < 256))))
26075 optimize_for_speed = false;
26077 optimize_for_speed = true;
26079 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
26081 algs = &cost->memset[TARGET_64BIT != 0];
26083 algs = &cost->memcpy[TARGET_64BIT != 0];
26085 /* See maximal size for user defined algorithm. */
26086 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
26088 enum stringop_alg candidate = algs->size[i].alg;
26089 bool usable = alg_usable_p (candidate, memset, have_as);
26090 any_alg_usable_p |= usable;
26092 if (candidate != libcall && candidate && usable)
26093 max = algs->size[i].max;
26096 /* If expected size is not known but max size is small enough
26097 so inline version is a win, set expected size into
26099 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
26100 && expected_size == -1)
26101 expected_size = min_size / 2 + max_size / 2;
26103 /* If user specified the algorithm, honor it if possible. */
26104 if (ix86_stringop_alg != no_stringop
26105 && alg_usable_p (ix86_stringop_alg, memset, have_as))
26106 return ix86_stringop_alg;
26107 /* rep; movq or rep; movl is the smallest variant. */
26108 else if (!optimize_for_speed)
26111 if (!count || (count & 3) || (memset && !zero_memset))
26112 return alg_usable_p (rep_prefix_1_byte, memset, have_as)
26113 ? rep_prefix_1_byte : loop_1_byte;
26115 return alg_usable_p (rep_prefix_4_byte, memset, have_as)
26116 ? rep_prefix_4_byte : loop;
26118 /* Very tiny blocks are best handled via the loop, REP is expensive to
26120 else if (expected_size != -1 && expected_size < 4)
26121 return loop_1_byte;
26122 else if (expected_size != -1)
26124 enum stringop_alg alg = libcall;
26125 bool alg_noalign = false;
26126 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
26128 /* We get here if the algorithms that were not libcall-based
26129 were rep-prefix based and we are unable to use rep prefixes
26130 based on global register usage. Break out of the loop and
26131 use the heuristic below. */
26132 if (algs->size[i].max == 0)
26134 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
26136 enum stringop_alg candidate = algs->size[i].alg;
26138 if (candidate != libcall
26139 && alg_usable_p (candidate, memset, have_as))
26142 alg_noalign = algs->size[i].noalign;
26144 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
26145 last non-libcall inline algorithm. */
26146 if (TARGET_INLINE_ALL_STRINGOPS)
26148 /* When the current size is best to be copied by a libcall,
26149 but we are still forced to inline, run the heuristic below
26150 that will pick code for medium sized blocks. */
26151 if (alg != libcall)
26153 *noalign = alg_noalign;
26156 else if (!any_alg_usable_p)
26159 else if (alg_usable_p (candidate, memset, have_as))
26161 *noalign = algs->size[i].noalign;
26167 /* When asked to inline the call anyway, try to pick meaningful choice.
26168 We look for maximal size of block that is faster to copy by hand and
26169 take blocks of at most of that size guessing that average size will
26170 be roughly half of the block.
26172 If this turns out to be bad, we might simply specify the preferred
26173 choice in ix86_costs. */
26174 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
26175 && (algs->unknown_size == libcall
26176 || !alg_usable_p (algs->unknown_size, memset, have_as)))
26178 enum stringop_alg alg;
26179 HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2;
26181 /* If there aren't any usable algorithms or if recursing already,
26182 then recursing on smaller sizes or same size isn't going to
26183 find anything. Just return the simple byte-at-a-time copy loop. */
26184 if (!any_alg_usable_p || recur)
26186 /* Pick something reasonable. */
26187 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY && !recur)
26188 *dynamic_check = 128;
26189 return loop_1_byte;
26191 alg = decide_alg (count, new_expected_size, min_size, max_size, memset,
26192 zero_memset, have_as, dynamic_check, noalign, true);
26193 gcc_assert (*dynamic_check == -1);
26194 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
26195 *dynamic_check = max;
26197 gcc_assert (alg != libcall);
26200 return (alg_usable_p (algs->unknown_size, memset, have_as)
26201 ? algs->unknown_size : libcall);
26204 /* Decide on alignment. We know that the operand is already aligned to ALIGN
26205 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
26207 decide_alignment (int align,
26208 enum stringop_alg alg,
26210 machine_mode move_mode)
26212 int desired_align = 0;
26214 gcc_assert (alg != no_stringop);
26216 if (alg == libcall)
26218 if (move_mode == VOIDmode)
26221 desired_align = GET_MODE_SIZE (move_mode);
26222 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
26223 copying whole cacheline at once. */
26224 if (TARGET_PENTIUMPRO
26225 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
26230 if (desired_align < align)
26231 desired_align = align;
26232 if (expected_size != -1 && expected_size < 4)
26233 desired_align = align;
26235 return desired_align;
26239 /* Helper function for memcpy. For QImode value 0xXY produce
26240 0xXYXYXYXY of wide specified by MODE. This is essentially
26241 a * 0x10101010, but we can do slightly better than
26242 synth_mult by unwinding the sequence by hand on CPUs with
26245 promote_duplicated_reg (machine_mode mode, rtx val)
26247 machine_mode valmode = GET_MODE (val);
26249 int nops = mode == DImode ? 3 : 2;
26251 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
26252 if (val == const0_rtx)
26253 return copy_to_mode_reg (mode, CONST0_RTX (mode));
26254 if (CONST_INT_P (val))
26256 HOST_WIDE_INT v = INTVAL (val) & 255;
26260 if (mode == DImode)
26261 v |= (v << 16) << 16;
26262 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
26265 if (valmode == VOIDmode)
26267 if (valmode != QImode)
26268 val = gen_lowpart (QImode, val);
26269 if (mode == QImode)
26271 if (!TARGET_PARTIAL_REG_STALL)
26273 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
26274 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
26275 <= (ix86_cost->shift_const + ix86_cost->add) * nops
26276 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
26278 rtx reg = convert_modes (mode, QImode, val, true);
26279 tmp = promote_duplicated_reg (mode, const1_rtx);
26280 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
26285 rtx reg = convert_modes (mode, QImode, val, true);
26287 if (!TARGET_PARTIAL_REG_STALL)
26288 if (mode == SImode)
26289 emit_insn (gen_insvsi_1 (reg, reg));
26291 emit_insn (gen_insvdi_1 (reg, reg));
26294 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
26295 NULL, 1, OPTAB_DIRECT);
26297 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26299 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
26300 NULL, 1, OPTAB_DIRECT);
26301 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26302 if (mode == SImode)
26304 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
26305 NULL, 1, OPTAB_DIRECT);
26306 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26311 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
26312 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
26313 alignment from ALIGN to DESIRED_ALIGN. */
26315 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
26321 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
26322 promoted_val = promote_duplicated_reg (DImode, val);
26323 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
26324 promoted_val = promote_duplicated_reg (SImode, val);
26325 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
26326 promoted_val = promote_duplicated_reg (HImode, val);
26328 promoted_val = val;
26330 return promoted_val;
26333 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
26334 operations when profitable. The code depends upon architecture, block size
26335 and alignment, but always has one of the following overall structures:
26337 Aligned move sequence:
26339 1) Prologue guard: Conditional that jumps up to epilogues for small
26340 blocks that can be handled by epilogue alone. This is faster
26341 but also needed for correctness, since prologue assume the block
26342 is larger than the desired alignment.
26344 Optional dynamic check for size and libcall for large
26345 blocks is emitted here too, with -minline-stringops-dynamically.
26347 2) Prologue: copy first few bytes in order to get destination
26348 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
26349 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
26350 copied. We emit either a jump tree on power of two sized
26351 blocks, or a byte loop.
26353 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
26354 with specified algorithm.
26356 4) Epilogue: code copying tail of the block that is too small to be
26357 handled by main body (or up to size guarded by prologue guard).
26359 Misaligned move sequence
26361 1) missaligned move prologue/epilogue containing:
26362 a) Prologue handling small memory blocks and jumping to done_label
26363 (skipped if blocks are known to be large enough)
26364 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
26365 needed by single possibly misaligned move
26366 (skipped if alignment is not needed)
26367 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
26369 2) Zero size guard dispatching to done_label, if needed
26371 3) dispatch to library call, if needed,
26373 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
26374 with specified algorithm. */
26376 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
26377 rtx align_exp, rtx expected_align_exp,
26378 rtx expected_size_exp, rtx min_size_exp,
26379 rtx max_size_exp, rtx probable_max_size_exp,
26384 rtx_code_label *label = NULL;
26386 rtx_code_label *jump_around_label = NULL;
26387 HOST_WIDE_INT align = 1;
26388 unsigned HOST_WIDE_INT count = 0;
26389 HOST_WIDE_INT expected_size = -1;
26390 int size_needed = 0, epilogue_size_needed;
26391 int desired_align = 0, align_bytes = 0;
26392 enum stringop_alg alg;
26393 rtx promoted_val = NULL;
26394 rtx vec_promoted_val = NULL;
26395 bool force_loopy_epilogue = false;
26397 bool need_zero_guard = false;
26399 machine_mode move_mode = VOIDmode;
26400 int unroll_factor = 1;
26401 /* TODO: Once value ranges are available, fill in proper data. */
26402 unsigned HOST_WIDE_INT min_size = 0;
26403 unsigned HOST_WIDE_INT max_size = -1;
26404 unsigned HOST_WIDE_INT probable_max_size = -1;
26405 bool misaligned_prologue_used = false;
26408 if (CONST_INT_P (align_exp))
26409 align = INTVAL (align_exp);
26410 /* i386 can do misaligned access on reasonably increased cost. */
26411 if (CONST_INT_P (expected_align_exp)
26412 && INTVAL (expected_align_exp) > align)
26413 align = INTVAL (expected_align_exp);
26414 /* ALIGN is the minimum of destination and source alignment, but we care here
26415 just about destination alignment. */
26417 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
26418 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
26420 if (CONST_INT_P (count_exp))
26422 min_size = max_size = probable_max_size = count = expected_size
26423 = INTVAL (count_exp);
26424 /* When COUNT is 0, there is nothing to do. */
26431 min_size = INTVAL (min_size_exp);
26433 max_size = INTVAL (max_size_exp);
26434 if (probable_max_size_exp)
26435 probable_max_size = INTVAL (probable_max_size_exp);
26436 if (CONST_INT_P (expected_size_exp))
26437 expected_size = INTVAL (expected_size_exp);
26440 /* Make sure we don't need to care about overflow later on. */
26441 if (count > (HOST_WIDE_INT_1U << 30))
26444 have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst));
26446 have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src));
26448 /* Step 0: Decide on preferred algorithm, desired alignment and
26449 size of chunks to be copied by main loop. */
26450 alg = decide_alg (count, expected_size, min_size, probable_max_size,
26452 issetmem && val_exp == const0_rtx, have_as,
26453 &dynamic_check, &noalign, false);
26454 if (alg == libcall)
26456 gcc_assert (alg != no_stringop);
26458 /* For now vector-version of memset is generated only for memory zeroing, as
26459 creating of promoted vector value is very cheap in this case. */
26460 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
26461 alg = unrolled_loop;
26464 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
26465 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
26467 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
26470 move_mode = word_mode;
26476 gcc_unreachable ();
26478 need_zero_guard = true;
26479 move_mode = QImode;
26482 need_zero_guard = true;
26484 case unrolled_loop:
26485 need_zero_guard = true;
26486 unroll_factor = (TARGET_64BIT ? 4 : 2);
26489 need_zero_guard = true;
26491 /* Find the widest supported mode. */
26492 move_mode = word_mode;
26493 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
26494 != CODE_FOR_nothing)
26495 move_mode = GET_MODE_WIDER_MODE (move_mode);
26497 /* Find the corresponding vector mode with the same size as MOVE_MODE.
26498 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
26499 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
26501 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
26502 move_mode = mode_for_vector (word_mode, nunits);
26503 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
26504 move_mode = word_mode;
26506 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
26508 case rep_prefix_8_byte:
26509 move_mode = DImode;
26511 case rep_prefix_4_byte:
26512 move_mode = SImode;
26514 case rep_prefix_1_byte:
26515 move_mode = QImode;
26518 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
26519 epilogue_size_needed = size_needed;
26521 /* If we are going to call any library calls conditionally, make sure any
26522 pending stack adjustment happen before the first conditional branch,
26523 otherwise they will be emitted before the library call only and won't
26524 happen from the other branches. */
26525 if (dynamic_check != -1)
26526 do_pending_stack_adjust ();
26528 desired_align = decide_alignment (align, alg, expected_size, move_mode);
26529 if (!TARGET_ALIGN_STRINGOPS || noalign)
26530 align = desired_align;
26532 /* Step 1: Prologue guard. */
26534 /* Alignment code needs count to be in register. */
26535 if (CONST_INT_P (count_exp) && desired_align > align)
26537 if (INTVAL (count_exp) > desired_align
26538 && INTVAL (count_exp) > size_needed)
26541 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
26542 if (align_bytes <= 0)
26545 align_bytes = desired_align - align_bytes;
26547 if (align_bytes == 0)
26548 count_exp = force_reg (counter_mode (count_exp), count_exp);
26550 gcc_assert (desired_align >= 1 && align >= 1);
26552 /* Misaligned move sequences handle both prologue and epilogue at once.
26553 Default code generation results in a smaller code for large alignments
26554 and also avoids redundant job when sizes are known precisely. */
26555 misaligned_prologue_used
26556 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
26557 && MAX (desired_align, epilogue_size_needed) <= 32
26558 && desired_align <= epilogue_size_needed
26559 && ((desired_align > align && !align_bytes)
26560 || (!count && epilogue_size_needed > 1)));
26562 /* Do the cheap promotion to allow better CSE across the
26563 main loop and epilogue (ie one load of the big constant in the
26565 For now the misaligned move sequences do not have fast path
26566 without broadcasting. */
26567 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
26569 if (alg == vector_loop)
26571 gcc_assert (val_exp == const0_rtx);
26572 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
26573 promoted_val = promote_duplicated_reg_to_size (val_exp,
26574 GET_MODE_SIZE (word_mode),
26575 desired_align, align);
26579 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
26580 desired_align, align);
26583 /* Misaligned move sequences handles both prologues and epilogues at once.
26584 Default code generation results in smaller code for large alignments and
26585 also avoids redundant job when sizes are known precisely. */
26586 if (misaligned_prologue_used)
26588 /* Misaligned move prologue handled small blocks by itself. */
26589 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
26590 (dst, src, &destreg, &srcreg,
26591 move_mode, promoted_val, vec_promoted_val,
26593 &jump_around_label,
26594 desired_align < align
26595 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
26596 desired_align, align, &min_size, dynamic_check, issetmem);
26598 src = change_address (src, BLKmode, srcreg);
26599 dst = change_address (dst, BLKmode, destreg);
26600 set_mem_align (dst, desired_align * BITS_PER_UNIT);
26601 epilogue_size_needed = 0;
26602 if (need_zero_guard
26603 && min_size < (unsigned HOST_WIDE_INT) size_needed)
26605 /* It is possible that we copied enough so the main loop will not
26607 gcc_assert (size_needed > 1);
26608 if (jump_around_label == NULL_RTX)
26609 jump_around_label = gen_label_rtx ();
26610 emit_cmp_and_jump_insns (count_exp,
26611 GEN_INT (size_needed),
26612 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
26613 if (expected_size == -1
26614 || expected_size < (desired_align - align) / 2 + size_needed)
26615 predict_jump (REG_BR_PROB_BASE * 20 / 100);
26617 predict_jump (REG_BR_PROB_BASE * 60 / 100);
26620 /* Ensure that alignment prologue won't copy past end of block. */
26621 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
26623 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
26624 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
26625 Make sure it is power of 2. */
26626 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
26628 /* To improve performance of small blocks, we jump around the VAL
26629 promoting mode. This mean that if the promoted VAL is not constant,
26630 we might not use it in the epilogue and have to use byte
26632 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
26633 force_loopy_epilogue = true;
26634 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
26635 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
26637 /* If main algorithm works on QImode, no epilogue is needed.
26638 For small sizes just don't align anything. */
26639 if (size_needed == 1)
26640 desired_align = align;
26645 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
26647 label = gen_label_rtx ();
26648 emit_cmp_and_jump_insns (count_exp,
26649 GEN_INT (epilogue_size_needed),
26650 LTU, 0, counter_mode (count_exp), 1, label);
26651 if (expected_size == -1 || expected_size < epilogue_size_needed)
26652 predict_jump (REG_BR_PROB_BASE * 60 / 100);
26654 predict_jump (REG_BR_PROB_BASE * 20 / 100);
26658 /* Emit code to decide on runtime whether library call or inline should be
26660 if (dynamic_check != -1)
26662 if (!issetmem && CONST_INT_P (count_exp))
26664 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
26666 emit_block_move_via_libcall (dst, src, count_exp, false);
26667 count_exp = const0_rtx;
26673 rtx_code_label *hot_label = gen_label_rtx ();
26674 if (jump_around_label == NULL_RTX)
26675 jump_around_label = gen_label_rtx ();
26676 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
26677 LEU, 0, counter_mode (count_exp),
26679 predict_jump (REG_BR_PROB_BASE * 90 / 100);
26681 set_storage_via_libcall (dst, count_exp, val_exp, false);
26683 emit_block_move_via_libcall (dst, src, count_exp, false);
26684 emit_jump (jump_around_label);
26685 emit_label (hot_label);
26689 /* Step 2: Alignment prologue. */
26690 /* Do the expensive promotion once we branched off the small blocks. */
26691 if (issetmem && !promoted_val)
26692 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
26693 desired_align, align);
26695 if (desired_align > align && !misaligned_prologue_used)
26697 if (align_bytes == 0)
26699 /* Except for the first move in prologue, we no longer know
26700 constant offset in aliasing info. It don't seems to worth
26701 the pain to maintain it for the first move, so throw away
26703 dst = change_address (dst, BLKmode, destreg);
26705 src = change_address (src, BLKmode, srcreg);
26706 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
26707 promoted_val, vec_promoted_val,
26708 count_exp, align, desired_align,
26710 /* At most desired_align - align bytes are copied. */
26711 if (min_size < (unsigned)(desired_align - align))
26714 min_size -= desired_align - align;
26718 /* If we know how many bytes need to be stored before dst is
26719 sufficiently aligned, maintain aliasing info accurately. */
26720 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
26728 count_exp = plus_constant (counter_mode (count_exp),
26729 count_exp, -align_bytes);
26730 count -= align_bytes;
26731 min_size -= align_bytes;
26732 max_size -= align_bytes;
26734 if (need_zero_guard
26735 && min_size < (unsigned HOST_WIDE_INT) size_needed
26736 && (count < (unsigned HOST_WIDE_INT) size_needed
26737 || (align_bytes == 0
26738 && count < ((unsigned HOST_WIDE_INT) size_needed
26739 + desired_align - align))))
26741 /* It is possible that we copied enough so the main loop will not
26743 gcc_assert (size_needed > 1);
26744 if (label == NULL_RTX)
26745 label = gen_label_rtx ();
26746 emit_cmp_and_jump_insns (count_exp,
26747 GEN_INT (size_needed),
26748 LTU, 0, counter_mode (count_exp), 1, label);
26749 if (expected_size == -1
26750 || expected_size < (desired_align - align) / 2 + size_needed)
26751 predict_jump (REG_BR_PROB_BASE * 20 / 100);
26753 predict_jump (REG_BR_PROB_BASE * 60 / 100);
26756 if (label && size_needed == 1)
26758 emit_label (label);
26759 LABEL_NUSES (label) = 1;
26761 epilogue_size_needed = 1;
26763 promoted_val = val_exp;
26765 else if (label == NULL_RTX && !misaligned_prologue_used)
26766 epilogue_size_needed = size_needed;
26768 /* Step 3: Main loop. */
26775 gcc_unreachable ();
26778 case unrolled_loop:
26779 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
26780 count_exp, move_mode, unroll_factor,
26781 expected_size, issetmem);
26784 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
26785 vec_promoted_val, count_exp, move_mode,
26786 unroll_factor, expected_size, issetmem);
26788 case rep_prefix_8_byte:
26789 case rep_prefix_4_byte:
26790 case rep_prefix_1_byte:
26791 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
26792 val_exp, count_exp, move_mode, issetmem);
26795 /* Adjust properly the offset of src and dest memory for aliasing. */
26796 if (CONST_INT_P (count_exp))
26799 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
26800 (count / size_needed) * size_needed);
26801 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
26802 (count / size_needed) * size_needed);
26807 src = change_address (src, BLKmode, srcreg);
26808 dst = change_address (dst, BLKmode, destreg);
26811 /* Step 4: Epilogue to copy the remaining bytes. */
26815 /* When the main loop is done, COUNT_EXP might hold original count,
26816 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
26817 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
26818 bytes. Compensate if needed. */
26820 if (size_needed < epilogue_size_needed)
26823 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
26824 GEN_INT (size_needed - 1), count_exp, 1,
26826 if (tmp != count_exp)
26827 emit_move_insn (count_exp, tmp);
26829 emit_label (label);
26830 LABEL_NUSES (label) = 1;
26833 if (count_exp != const0_rtx && epilogue_size_needed > 1)
26835 if (force_loopy_epilogue)
26836 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
26837 epilogue_size_needed);
26841 expand_setmem_epilogue (dst, destreg, promoted_val,
26842 vec_promoted_val, count_exp,
26843 epilogue_size_needed);
26845 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
26846 epilogue_size_needed);
26849 if (jump_around_label)
26850 emit_label (jump_around_label);
26855 /* Expand the appropriate insns for doing strlen if not just doing
26858 out = result, initialized with the start address
26859 align_rtx = alignment of the address.
26860 scratch = scratch register, initialized with the startaddress when
26861 not aligned, otherwise undefined
26863 This is just the body. It needs the initializations mentioned above and
26864 some address computing at the end. These things are done in i386.md. */
26867 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
26871 rtx_code_label *align_2_label = NULL;
26872 rtx_code_label *align_3_label = NULL;
26873 rtx_code_label *align_4_label = gen_label_rtx ();
26874 rtx_code_label *end_0_label = gen_label_rtx ();
26876 rtx tmpreg = gen_reg_rtx (SImode);
26877 rtx scratch = gen_reg_rtx (SImode);
26881 if (CONST_INT_P (align_rtx))
26882 align = INTVAL (align_rtx);
26884 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
26886 /* Is there a known alignment and is it less than 4? */
26889 rtx scratch1 = gen_reg_rtx (Pmode);
26890 emit_move_insn (scratch1, out);
26891 /* Is there a known alignment and is it not 2? */
26894 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
26895 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
26897 /* Leave just the 3 lower bits. */
26898 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
26899 NULL_RTX, 0, OPTAB_WIDEN);
26901 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
26902 Pmode, 1, align_4_label);
26903 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
26904 Pmode, 1, align_2_label);
26905 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
26906 Pmode, 1, align_3_label);
26910 /* Since the alignment is 2, we have to check 2 or 0 bytes;
26911 check if is aligned to 4 - byte. */
26913 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
26914 NULL_RTX, 0, OPTAB_WIDEN);
26916 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
26917 Pmode, 1, align_4_label);
26920 mem = change_address (src, QImode, out);
26922 /* Now compare the bytes. */
26924 /* Compare the first n unaligned byte on a byte per byte basis. */
26925 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
26926 QImode, 1, end_0_label);
26928 /* Increment the address. */
26929 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
26931 /* Not needed with an alignment of 2 */
26934 emit_label (align_2_label);
26936 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
26939 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
26941 emit_label (align_3_label);
26944 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
26947 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
26950 /* Generate loop to check 4 bytes at a time. It is not a good idea to
26951 align this loop. It gives only huge programs, but does not help to
26953 emit_label (align_4_label);
26955 mem = change_address (src, SImode, out);
26956 emit_move_insn (scratch, mem);
26957 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
26959 /* This formula yields a nonzero result iff one of the bytes is zero.
26960 This saves three branches inside loop and many cycles. */
26962 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
26963 emit_insn (gen_one_cmplsi2 (scratch, scratch));
26964 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
26965 emit_insn (gen_andsi3 (tmpreg, tmpreg,
26966 gen_int_mode (0x80808080, SImode)));
26967 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
26972 rtx reg = gen_reg_rtx (SImode);
26973 rtx reg2 = gen_reg_rtx (Pmode);
26974 emit_move_insn (reg, tmpreg);
26975 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
26977 /* If zero is not in the first two bytes, move two bytes forward. */
26978 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
26979 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
26980 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
26981 emit_insn (gen_rtx_SET (tmpreg,
26982 gen_rtx_IF_THEN_ELSE (SImode, tmp,
26985 /* Emit lea manually to avoid clobbering of flags. */
26986 emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx)));
26988 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
26989 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
26990 emit_insn (gen_rtx_SET (out,
26991 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
26997 rtx_code_label *end_2_label = gen_label_rtx ();
26998 /* Is zero in the first two bytes? */
27000 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
27001 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
27002 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
27003 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
27004 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
27006 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
27007 JUMP_LABEL (tmp) = end_2_label;
27009 /* Not in the first two. Move two bytes forward. */
27010 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
27011 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
27013 emit_label (end_2_label);
27017 /* Avoid branch in fixing the byte. */
27018 tmpreg = gen_lowpart (QImode, tmpreg);
27019 emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg));
27020 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
27021 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
27022 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
27024 emit_label (end_0_label);
27027 /* Expand strlen. */
27030 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
27032 rtx addr, scratch1, scratch2, scratch3, scratch4;
27034 /* The generic case of strlen expander is long. Avoid it's
27035 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
27037 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
27038 && !TARGET_INLINE_ALL_STRINGOPS
27039 && !optimize_insn_for_size_p ()
27040 && (!CONST_INT_P (align) || INTVAL (align) < 4))
27043 addr = force_reg (Pmode, XEXP (src, 0));
27044 scratch1 = gen_reg_rtx (Pmode);
27046 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
27047 && !optimize_insn_for_size_p ())
27049 /* Well it seems that some optimizer does not combine a call like
27050 foo(strlen(bar), strlen(bar));
27051 when the move and the subtraction is done here. It does calculate
27052 the length just once when these instructions are done inside of
27053 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
27054 often used and I use one fewer register for the lifetime of
27055 output_strlen_unroll() this is better. */
27057 emit_move_insn (out, addr);
27059 ix86_expand_strlensi_unroll_1 (out, src, align);
27061 /* strlensi_unroll_1 returns the address of the zero at the end of
27062 the string, like memchr(), so compute the length by subtracting
27063 the start address. */
27064 emit_insn (ix86_gen_sub3 (out, out, addr));
27070 /* Can't use this if the user has appropriated eax, ecx, or edi. */
27071 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
27073 /* Can't use this for non-default address spaces. */
27074 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)))
27077 scratch2 = gen_reg_rtx (Pmode);
27078 scratch3 = gen_reg_rtx (Pmode);
27079 scratch4 = force_reg (Pmode, constm1_rtx);
27081 emit_move_insn (scratch3, addr);
27082 eoschar = force_reg (QImode, eoschar);
27084 src = replace_equiv_address_nv (src, scratch3);
27086 /* If .md starts supporting :P, this can be done in .md. */
27087 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
27088 scratch4), UNSPEC_SCAS);
27089 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
27090 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
27091 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
27096 /* For given symbol (function) construct code to compute address of it's PLT
27097 entry in large x86-64 PIC model. */
27099 construct_plt_address (rtx symbol)
27103 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
27104 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
27105 gcc_assert (Pmode == DImode);
27107 tmp = gen_reg_rtx (Pmode);
27108 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
27110 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
27111 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
27116 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
27118 rtx pop, bool sibcall)
27121 rtx use = NULL, call;
27122 unsigned int vec_len = 0;
27124 if (pop == const0_rtx)
27126 gcc_assert (!TARGET_64BIT || !pop);
27128 if (TARGET_MACHO && !TARGET_64BIT)
27131 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
27132 fnaddr = machopic_indirect_call_target (fnaddr);
27137 /* Static functions and indirect calls don't need the pic register. Also,
27138 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
27139 it an indirect call. */
27140 rtx addr = XEXP (fnaddr, 0);
27142 && GET_CODE (addr) == SYMBOL_REF
27143 && !SYMBOL_REF_LOCAL_P (addr))
27146 && (SYMBOL_REF_DECL (addr) == NULL_TREE
27147 || !lookup_attribute ("noplt",
27148 DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr)))))
27151 || (ix86_cmodel == CM_LARGE_PIC
27152 && DEFAULT_ABI != MS_ABI))
27154 use_reg (&use, gen_rtx_REG (Pmode,
27155 REAL_PIC_OFFSET_TABLE_REGNUM));
27156 if (ix86_use_pseudo_pic_reg ())
27157 emit_move_insn (gen_rtx_REG (Pmode,
27158 REAL_PIC_OFFSET_TABLE_REGNUM),
27159 pic_offset_table_rtx);
27162 else if (!TARGET_PECOFF && !TARGET_MACHO)
27166 fnaddr = gen_rtx_UNSPEC (Pmode,
27167 gen_rtvec (1, addr),
27169 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
27173 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
27175 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
27176 fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
27179 fnaddr = gen_const_mem (Pmode, fnaddr);
27180 /* Pmode may not be the same as word_mode for x32, which
27181 doesn't support indirect branch via 32-bit memory slot.
27182 Since x32 GOT slot is 64 bit with zero upper 32 bits,
27183 indirect branch via x32 GOT slot is OK. */
27184 if (GET_MODE (fnaddr) != word_mode)
27185 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
27186 fnaddr = gen_rtx_MEM (QImode, fnaddr);
27191 /* Skip setting up RAX register for -mskip-rax-setup when there are no
27192 parameters passed in vector registers. */
27194 && (INTVAL (callarg2) > 0
27195 || (INTVAL (callarg2) == 0
27196 && (TARGET_SSE || !flag_skip_rax_setup))))
27198 rtx al = gen_rtx_REG (QImode, AX_REG);
27199 emit_move_insn (al, callarg2);
27200 use_reg (&use, al);
27203 if (ix86_cmodel == CM_LARGE_PIC
27206 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
27207 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
27208 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
27209 /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
27210 branch via x32 GOT slot is OK. */
27211 else if (!(TARGET_X32
27213 && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
27214 && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
27216 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
27217 : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
27219 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
27220 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
27223 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
27227 /* We should add bounds as destination register in case
27228 pointer with bounds may be returned. */
27229 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
27231 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
27232 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
27233 if (GET_CODE (retval) == PARALLEL)
27235 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
27236 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
27237 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
27238 retval = chkp_join_splitted_slot (retval, par);
27242 retval = gen_rtx_PARALLEL (VOIDmode,
27243 gen_rtvec (3, retval, b0, b1));
27244 chkp_put_regs_to_expr_list (retval);
27248 call = gen_rtx_SET (retval, call);
27250 vec[vec_len++] = call;
27254 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
27255 pop = gen_rtx_SET (stack_pointer_rtx, pop);
27256 vec[vec_len++] = pop;
27259 if (TARGET_64BIT_MS_ABI
27260 && (!callarg2 || INTVAL (callarg2) != -2))
27262 int const cregs_size
27263 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
27266 for (i = 0; i < cregs_size; i++)
27268 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
27269 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
27271 clobber_reg (&use, gen_rtx_REG (mode, regno));
27276 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
27277 call = emit_call_insn (call);
27279 CALL_INSN_FUNCTION_USAGE (call) = use;
27284 /* Return true if the function being called was marked with attribute "noplt"
27285 or using -fno-plt and we are compiling for non-PIC and x86_64. We need to
27286 handle the non-PIC case in the backend because there is no easy interface
27287 for the front-end to force non-PLT calls to use the GOT. This is currently
27288 used only with 64-bit ELF targets to call the function marked "noplt"
27292 ix86_nopic_noplt_attribute_p (rtx call_op)
27294 if (flag_pic || ix86_cmodel == CM_LARGE
27295 || !TARGET_64BIT || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
27296 || SYMBOL_REF_LOCAL_P (call_op))
27299 tree symbol_decl = SYMBOL_REF_DECL (call_op);
27302 || (symbol_decl != NULL_TREE
27303 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
27309 /* Output the assembly for a call instruction. */
27312 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
27314 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
27315 bool seh_nop_p = false;
27318 if (SIBLING_CALL_P (insn))
27322 if (ix86_nopic_noplt_attribute_p (call_op))
27323 xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
27325 xasm = "%!jmp\t%P0";
27327 /* SEH epilogue detection requires the indirect branch case
27328 to include REX.W. */
27329 else if (TARGET_SEH)
27330 xasm = "%!rex.W jmp\t%A0";
27332 xasm = "%!jmp\t%A0";
27334 output_asm_insn (xasm, &call_op);
27338 /* SEH unwinding can require an extra nop to be emitted in several
27339 circumstances. Determine if we have one of those. */
27344 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
27346 /* If we get to another real insn, we don't need the nop. */
27350 /* If we get to the epilogue note, prevent a catch region from
27351 being adjacent to the standard epilogue sequence. If non-
27352 call-exceptions, we'll have done this during epilogue emission. */
27353 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
27354 && !flag_non_call_exceptions
27355 && !can_throw_internal (insn))
27362 /* If we didn't find a real insn following the call, prevent the
27363 unwinder from looking into the next function. */
27370 if (ix86_nopic_noplt_attribute_p (call_op))
27371 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
27373 xasm = "%!call\t%P0";
27376 xasm = "%!call\t%A0";
27378 output_asm_insn (xasm, &call_op);
27386 /* Clear stack slot assignments remembered from previous functions.
27387 This is called from INIT_EXPANDERS once before RTL is emitted for each
27390 static struct machine_function *
27391 ix86_init_machine_status (void)
27393 struct machine_function *f;
27395 f = ggc_cleared_alloc<machine_function> ();
27396 f->use_fast_prologue_epilogue_nregs = -1;
27397 f->call_abi = ix86_abi;
27402 /* Return a MEM corresponding to a stack slot with mode MODE.
27403 Allocate a new slot if necessary.
27405 The RTL for a function can have several slots available: N is
27406 which slot to use. */
27409 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
27411 struct stack_local_entry *s;
27413 gcc_assert (n < MAX_386_STACK_LOCALS);
27415 for (s = ix86_stack_locals; s; s = s->next)
27416 if (s->mode == mode && s->n == n)
27417 return validize_mem (copy_rtx (s->rtl));
27419 s = ggc_alloc<stack_local_entry> ();
27422 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
27424 s->next = ix86_stack_locals;
27425 ix86_stack_locals = s;
27426 return validize_mem (copy_rtx (s->rtl));
27430 ix86_instantiate_decls (void)
27432 struct stack_local_entry *s;
27434 for (s = ix86_stack_locals; s; s = s->next)
27435 if (s->rtl != NULL_RTX)
27436 instantiate_decl_rtl (s->rtl);
27439 /* Return the number used for encoding REG, in the range 0..7. */
27442 reg_encoded_number (rtx reg)
27444 unsigned regno = REGNO (reg);
27466 if (IN_RANGE (regno, FIRST_STACK_REG, LAST_STACK_REG))
27467 return regno - FIRST_STACK_REG;
27468 if (IN_RANGE (regno, FIRST_SSE_REG, LAST_SSE_REG))
27469 return regno - FIRST_SSE_REG;
27470 if (IN_RANGE (regno, FIRST_MMX_REG, LAST_MMX_REG))
27471 return regno - FIRST_MMX_REG;
27472 if (IN_RANGE (regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG))
27473 return regno - FIRST_REX_SSE_REG;
27474 if (IN_RANGE (regno, FIRST_REX_INT_REG, LAST_REX_INT_REG))
27475 return regno - FIRST_REX_INT_REG;
27476 if (IN_RANGE (regno, FIRST_MASK_REG, LAST_MASK_REG))
27477 return regno - FIRST_MASK_REG;
27478 if (IN_RANGE (regno, FIRST_BND_REG, LAST_BND_REG))
27479 return regno - FIRST_BND_REG;
27483 /* Given an insn INSN with NOPERANDS OPERANDS, return the modr/m byte used
27484 in its encoding if it could be relevant for ROP mitigation, otherwise
27485 return -1. If POPNO0 and POPNO1 are nonnull, store the operand numbers
27486 used for calculating it into them. */
27489 ix86_get_modrm_for_rop (rtx_insn *insn, rtx *operands, int noperands,
27490 int *popno0 = 0, int *popno1 = 0)
27492 if (asm_noperands (PATTERN (insn)) >= 0)
27494 int has_modrm = get_attr_modrm (insn);
27497 enum attr_modrm_class cls = get_attr_modrm_class (insn);
27501 case MODRM_CLASS_OP02:
27502 gcc_assert (noperands >= 3);
27511 case MODRM_CLASS_OP01:
27512 gcc_assert (noperands >= 2);
27524 if (REG_P (op0) && REG_P (op1))
27526 int enc0 = reg_encoded_number (op0);
27527 int enc1 = reg_encoded_number (op1);
27528 return 0xc0 + (enc1 << 3) + enc0;
27533 /* Check whether x86 address PARTS is a pc-relative address. */
27536 rip_relative_addr_p (struct ix86_address *parts)
27538 rtx base, index, disp;
27540 base = parts->base;
27541 index = parts->index;
27542 disp = parts->disp;
27544 if (disp && !base && !index)
27550 if (GET_CODE (disp) == CONST)
27551 symbol = XEXP (disp, 0);
27552 if (GET_CODE (symbol) == PLUS
27553 && CONST_INT_P (XEXP (symbol, 1)))
27554 symbol = XEXP (symbol, 0);
27556 if (GET_CODE (symbol) == LABEL_REF
27557 || (GET_CODE (symbol) == SYMBOL_REF
27558 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
27559 || (GET_CODE (symbol) == UNSPEC
27560 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
27561 || XINT (symbol, 1) == UNSPEC_PCREL
27562 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
27569 /* Calculate the length of the memory address in the instruction encoding.
27570 Includes addr32 prefix, does not include the one-byte modrm, opcode,
27571 or other prefixes. We never generate addr32 prefix for LEA insn. */
27574 memory_address_length (rtx addr, bool lea)
27576 struct ix86_address parts;
27577 rtx base, index, disp;
27581 if (GET_CODE (addr) == PRE_DEC
27582 || GET_CODE (addr) == POST_INC
27583 || GET_CODE (addr) == PRE_MODIFY
27584 || GET_CODE (addr) == POST_MODIFY)
27587 ok = ix86_decompose_address (addr, &parts);
27590 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
27592 /* If this is not LEA instruction, add the length of addr32 prefix. */
27593 if (TARGET_64BIT && !lea
27594 && (SImode_address_operand (addr, VOIDmode)
27595 || (parts.base && GET_MODE (parts.base) == SImode)
27596 || (parts.index && GET_MODE (parts.index) == SImode)))
27600 index = parts.index;
27603 if (base && SUBREG_P (base))
27604 base = SUBREG_REG (base);
27605 if (index && SUBREG_P (index))
27606 index = SUBREG_REG (index);
27608 gcc_assert (base == NULL_RTX || REG_P (base));
27609 gcc_assert (index == NULL_RTX || REG_P (index));
27612 - esp as the base always wants an index,
27613 - ebp as the base always wants a displacement,
27614 - r12 as the base always wants an index,
27615 - r13 as the base always wants a displacement. */
27617 /* Register Indirect. */
27618 if (base && !index && !disp)
27620 /* esp (for its index) and ebp (for its displacement) need
27621 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
27623 if (base == arg_pointer_rtx
27624 || base == frame_pointer_rtx
27625 || REGNO (base) == SP_REG
27626 || REGNO (base) == BP_REG
27627 || REGNO (base) == R12_REG
27628 || REGNO (base) == R13_REG)
27632 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
27633 is not disp32, but disp32(%rip), so for disp32
27634 SIB byte is needed, unless print_operand_address
27635 optimizes it into disp32(%rip) or (%rip) is implied
27637 else if (disp && !base && !index)
27640 if (rip_relative_addr_p (&parts))
27645 /* Find the length of the displacement constant. */
27648 if (base && satisfies_constraint_K (disp))
27653 /* ebp always wants a displacement. Similarly r13. */
27654 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
27657 /* An index requires the two-byte modrm form.... */
27659 /* ...like esp (or r12), which always wants an index. */
27660 || base == arg_pointer_rtx
27661 || base == frame_pointer_rtx
27662 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
27669 /* Compute default value for "length_immediate" attribute. When SHORTFORM
27670 is set, expect that insn have 8bit immediate alternative. */
27672 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
27676 extract_insn_cached (insn);
27677 for (i = recog_data.n_operands - 1; i >= 0; --i)
27678 if (CONSTANT_P (recog_data.operand[i]))
27680 enum attr_mode mode = get_attr_mode (insn);
27683 if (shortform && CONST_INT_P (recog_data.operand[i]))
27685 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
27692 ival = trunc_int_for_mode (ival, HImode);
27695 ival = trunc_int_for_mode (ival, SImode);
27700 if (IN_RANGE (ival, -128, 127))
27717 /* Immediates for DImode instructions are encoded
27718 as 32bit sign extended values. */
27723 fatal_insn ("unknown insn mode", insn);
27729 /* Compute default value for "length_address" attribute. */
27731 ix86_attr_length_address_default (rtx_insn *insn)
27735 if (get_attr_type (insn) == TYPE_LEA)
27737 rtx set = PATTERN (insn), addr;
27739 if (GET_CODE (set) == PARALLEL)
27740 set = XVECEXP (set, 0, 0);
27742 gcc_assert (GET_CODE (set) == SET);
27744 addr = SET_SRC (set);
27746 return memory_address_length (addr, true);
27749 extract_insn_cached (insn);
27750 for (i = recog_data.n_operands - 1; i >= 0; --i)
27752 rtx op = recog_data.operand[i];
27755 constrain_operands_cached (insn, reload_completed);
27756 if (which_alternative != -1)
27758 const char *constraints = recog_data.constraints[i];
27759 int alt = which_alternative;
27761 while (*constraints == '=' || *constraints == '+')
27764 while (*constraints++ != ',')
27766 /* Skip ignored operands. */
27767 if (*constraints == 'X')
27771 int len = memory_address_length (XEXP (op, 0), false);
27773 /* Account for segment prefix for non-default addr spaces. */
27774 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
27783 /* Compute default value for "length_vex" attribute. It includes
27784 2 or 3 byte VEX prefix and 1 opcode byte. */
27787 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
27792 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
27793 byte VEX prefix. */
27794 if (!has_0f_opcode || has_vex_w)
27797 /* We can always use 2 byte VEX prefix in 32bit. */
27801 extract_insn_cached (insn);
27803 for (i = recog_data.n_operands - 1; i >= 0; --i)
27804 if (REG_P (recog_data.operand[i]))
27806 /* REX.W bit uses 3 byte VEX prefix. */
27807 if (GET_MODE (recog_data.operand[i]) == DImode
27808 && GENERAL_REG_P (recog_data.operand[i]))
27813 /* REX.X or REX.B bits use 3 byte VEX prefix. */
27814 if (MEM_P (recog_data.operand[i])
27815 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
27822 /* Return the maximum number of instructions a cpu can issue. */
27825 ix86_issue_rate (void)
27829 case PROCESSOR_PENTIUM:
27830 case PROCESSOR_LAKEMONT:
27831 case PROCESSOR_BONNELL:
27832 case PROCESSOR_SILVERMONT:
27833 case PROCESSOR_KNL:
27834 case PROCESSOR_INTEL:
27836 case PROCESSOR_BTVER2:
27837 case PROCESSOR_PENTIUM4:
27838 case PROCESSOR_NOCONA:
27841 case PROCESSOR_PENTIUMPRO:
27842 case PROCESSOR_ATHLON:
27844 case PROCESSOR_AMDFAM10:
27845 case PROCESSOR_GENERIC:
27846 case PROCESSOR_BTVER1:
27849 case PROCESSOR_BDVER1:
27850 case PROCESSOR_BDVER2:
27851 case PROCESSOR_BDVER3:
27852 case PROCESSOR_BDVER4:
27853 case PROCESSOR_ZNVER1:
27854 case PROCESSOR_CORE2:
27855 case PROCESSOR_NEHALEM:
27856 case PROCESSOR_SANDYBRIDGE:
27857 case PROCESSOR_HASWELL:
27865 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
27866 by DEP_INSN and nothing set by DEP_INSN. */
27869 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
27873 /* Simplify the test for uninteresting insns. */
27874 if (insn_type != TYPE_SETCC
27875 && insn_type != TYPE_ICMOV
27876 && insn_type != TYPE_FCMOV
27877 && insn_type != TYPE_IBR)
27880 if ((set = single_set (dep_insn)) != 0)
27882 set = SET_DEST (set);
27885 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
27886 && XVECLEN (PATTERN (dep_insn), 0) == 2
27887 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
27888 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
27890 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
27891 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
27896 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
27899 /* This test is true if the dependent insn reads the flags but
27900 not any other potentially set register. */
27901 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
27904 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
27910 /* Return true iff USE_INSN has a memory address with operands set by
27914 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
27917 extract_insn_cached (use_insn);
27918 for (i = recog_data.n_operands - 1; i >= 0; --i)
27919 if (MEM_P (recog_data.operand[i]))
27921 rtx addr = XEXP (recog_data.operand[i], 0);
27922 return modified_in_p (addr, set_insn) != 0;
27927 /* Helper function for exact_store_load_dependency.
27928 Return true if addr is found in insn. */
27930 exact_dependency_1 (rtx addr, rtx insn)
27932 enum rtx_code code;
27933 const char *format_ptr;
27936 code = GET_CODE (insn);
27940 if (rtx_equal_p (addr, insn))
27955 format_ptr = GET_RTX_FORMAT (code);
27956 for (i = 0; i < GET_RTX_LENGTH (code); i++)
27958 switch (*format_ptr++)
27961 if (exact_dependency_1 (addr, XEXP (insn, i)))
27965 for (j = 0; j < XVECLEN (insn, i); j++)
27966 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
27974 /* Return true if there exists exact dependency for store & load, i.e.
27975 the same memory address is used in them. */
27977 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
27981 set1 = single_set (store);
27984 if (!MEM_P (SET_DEST (set1)))
27986 set2 = single_set (load);
27989 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
27995 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
27997 enum attr_type insn_type, dep_insn_type;
27998 enum attr_memory memory;
28000 int dep_insn_code_number;
28002 /* Anti and output dependencies have zero cost on all CPUs. */
28003 if (REG_NOTE_KIND (link) != 0)
28006 dep_insn_code_number = recog_memoized (dep_insn);
28008 /* If we can't recognize the insns, we can't really do anything. */
28009 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
28012 insn_type = get_attr_type (insn);
28013 dep_insn_type = get_attr_type (dep_insn);
28017 case PROCESSOR_PENTIUM:
28018 case PROCESSOR_LAKEMONT:
28019 /* Address Generation Interlock adds a cycle of latency. */
28020 if (insn_type == TYPE_LEA)
28022 rtx addr = PATTERN (insn);
28024 if (GET_CODE (addr) == PARALLEL)
28025 addr = XVECEXP (addr, 0, 0);
28027 gcc_assert (GET_CODE (addr) == SET);
28029 addr = SET_SRC (addr);
28030 if (modified_in_p (addr, dep_insn))
28033 else if (ix86_agi_dependent (dep_insn, insn))
28036 /* ??? Compares pair with jump/setcc. */
28037 if (ix86_flags_dependent (insn, dep_insn, insn_type))
28040 /* Floating point stores require value to be ready one cycle earlier. */
28041 if (insn_type == TYPE_FMOV
28042 && get_attr_memory (insn) == MEMORY_STORE
28043 && !ix86_agi_dependent (dep_insn, insn))
28047 case PROCESSOR_PENTIUMPRO:
28048 /* INT->FP conversion is expensive. */
28049 if (get_attr_fp_int_src (dep_insn))
28052 /* There is one cycle extra latency between an FP op and a store. */
28053 if (insn_type == TYPE_FMOV
28054 && (set = single_set (dep_insn)) != NULL_RTX
28055 && (set2 = single_set (insn)) != NULL_RTX
28056 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
28057 && MEM_P (SET_DEST (set2)))
28060 memory = get_attr_memory (insn);
28062 /* Show ability of reorder buffer to hide latency of load by executing
28063 in parallel with previous instruction in case
28064 previous instruction is not needed to compute the address. */
28065 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28066 && !ix86_agi_dependent (dep_insn, insn))
28068 /* Claim moves to take one cycle, as core can issue one load
28069 at time and the next load can start cycle later. */
28070 if (dep_insn_type == TYPE_IMOV
28071 || dep_insn_type == TYPE_FMOV)
28079 /* The esp dependency is resolved before
28080 the instruction is really finished. */
28081 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
28082 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
28085 /* INT->FP conversion is expensive. */
28086 if (get_attr_fp_int_src (dep_insn))
28089 memory = get_attr_memory (insn);
28091 /* Show ability of reorder buffer to hide latency of load by executing
28092 in parallel with previous instruction in case
28093 previous instruction is not needed to compute the address. */
28094 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28095 && !ix86_agi_dependent (dep_insn, insn))
28097 /* Claim moves to take one cycle, as core can issue one load
28098 at time and the next load can start cycle later. */
28099 if (dep_insn_type == TYPE_IMOV
28100 || dep_insn_type == TYPE_FMOV)
28109 case PROCESSOR_AMDFAM10:
28110 case PROCESSOR_BDVER1:
28111 case PROCESSOR_BDVER2:
28112 case PROCESSOR_BDVER3:
28113 case PROCESSOR_BDVER4:
28114 case PROCESSOR_ZNVER1:
28115 case PROCESSOR_BTVER1:
28116 case PROCESSOR_BTVER2:
28117 case PROCESSOR_GENERIC:
28118 /* Stack engine allows to execute push&pop instructions in parall. */
28119 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
28120 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
28124 case PROCESSOR_ATHLON:
28126 memory = get_attr_memory (insn);
28128 /* Show ability of reorder buffer to hide latency of load by executing
28129 in parallel with previous instruction in case
28130 previous instruction is not needed to compute the address. */
28131 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28132 && !ix86_agi_dependent (dep_insn, insn))
28134 enum attr_unit unit = get_attr_unit (insn);
28137 /* Because of the difference between the length of integer and
28138 floating unit pipeline preparation stages, the memory operands
28139 for floating point are cheaper.
28141 ??? For Athlon it the difference is most probably 2. */
28142 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
28145 loadcost = TARGET_ATHLON ? 2 : 0;
28147 if (cost >= loadcost)
28154 case PROCESSOR_CORE2:
28155 case PROCESSOR_NEHALEM:
28156 case PROCESSOR_SANDYBRIDGE:
28157 case PROCESSOR_HASWELL:
28158 /* Stack engine allows to execute push&pop instructions in parall. */
28159 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
28160 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
28163 memory = get_attr_memory (insn);
28165 /* Show ability of reorder buffer to hide latency of load by executing
28166 in parallel with previous instruction in case
28167 previous instruction is not needed to compute the address. */
28168 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28169 && !ix86_agi_dependent (dep_insn, insn))
28178 case PROCESSOR_SILVERMONT:
28179 case PROCESSOR_KNL:
28180 case PROCESSOR_INTEL:
28181 if (!reload_completed)
28184 /* Increase cost of integer loads. */
28185 memory = get_attr_memory (dep_insn);
28186 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28188 enum attr_unit unit = get_attr_unit (dep_insn);
28189 if (unit == UNIT_INTEGER && cost == 1)
28191 if (memory == MEMORY_LOAD)
28195 /* Increase cost of ld/st for short int types only
28196 because of store forwarding issue. */
28197 rtx set = single_set (dep_insn);
28198 if (set && (GET_MODE (SET_DEST (set)) == QImode
28199 || GET_MODE (SET_DEST (set)) == HImode))
28201 /* Increase cost of store/load insn if exact
28202 dependence exists and it is load insn. */
28203 enum attr_memory insn_memory = get_attr_memory (insn);
28204 if (insn_memory == MEMORY_LOAD
28205 && exact_store_load_dependency (dep_insn, insn))
28219 /* How many alternative schedules to try. This should be as wide as the
28220 scheduling freedom in the DFA, but no wider. Making this value too
28221 large results extra work for the scheduler. */
28224 ia32_multipass_dfa_lookahead (void)
28228 case PROCESSOR_PENTIUM:
28229 case PROCESSOR_LAKEMONT:
28232 case PROCESSOR_PENTIUMPRO:
28236 case PROCESSOR_BDVER1:
28237 case PROCESSOR_BDVER2:
28238 case PROCESSOR_BDVER3:
28239 case PROCESSOR_BDVER4:
28240 /* We use lookahead value 4 for BD both before and after reload
28241 schedules. Plan is to have value 8 included for O3. */
28244 case PROCESSOR_CORE2:
28245 case PROCESSOR_NEHALEM:
28246 case PROCESSOR_SANDYBRIDGE:
28247 case PROCESSOR_HASWELL:
28248 case PROCESSOR_BONNELL:
28249 case PROCESSOR_SILVERMONT:
28250 case PROCESSOR_KNL:
28251 case PROCESSOR_INTEL:
28252 /* Generally, we want haifa-sched:max_issue() to look ahead as far
28253 as many instructions can be executed on a cycle, i.e.,
28254 issue_rate. I wonder why tuning for many CPUs does not do this. */
28255 if (reload_completed)
28256 return ix86_issue_rate ();
28257 /* Don't use lookahead for pre-reload schedule to save compile time. */
28265 /* Return true if target platform supports macro-fusion. */
28268 ix86_macro_fusion_p ()
28270 return TARGET_FUSE_CMP_AND_BRANCH;
28273 /* Check whether current microarchitecture support macro fusion
28274 for insn pair "CONDGEN + CONDJMP". Refer to
28275 "Intel Architectures Optimization Reference Manual". */
28278 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
28281 enum rtx_code ccode;
28282 rtx compare_set = NULL_RTX, test_if, cond;
28283 rtx alu_set = NULL_RTX, addr = NULL_RTX;
28285 if (!any_condjump_p (condjmp))
28288 if (get_attr_type (condgen) != TYPE_TEST
28289 && get_attr_type (condgen) != TYPE_ICMP
28290 && get_attr_type (condgen) != TYPE_INCDEC
28291 && get_attr_type (condgen) != TYPE_ALU)
28294 compare_set = single_set (condgen);
28295 if (compare_set == NULL_RTX
28296 && !TARGET_FUSE_ALU_AND_BRANCH)
28299 if (compare_set == NULL_RTX)
28302 rtx pat = PATTERN (condgen);
28303 for (i = 0; i < XVECLEN (pat, 0); i++)
28304 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
28306 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
28307 if (GET_CODE (set_src) == COMPARE)
28308 compare_set = XVECEXP (pat, 0, i);
28310 alu_set = XVECEXP (pat, 0, i);
28313 if (compare_set == NULL_RTX)
28315 src = SET_SRC (compare_set);
28316 if (GET_CODE (src) != COMPARE)
28319 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
28321 if ((MEM_P (XEXP (src, 0))
28322 && CONST_INT_P (XEXP (src, 1)))
28323 || (MEM_P (XEXP (src, 1))
28324 && CONST_INT_P (XEXP (src, 0))))
28327 /* No fusion for RIP-relative address. */
28328 if (MEM_P (XEXP (src, 0)))
28329 addr = XEXP (XEXP (src, 0), 0);
28330 else if (MEM_P (XEXP (src, 1)))
28331 addr = XEXP (XEXP (src, 1), 0);
28334 ix86_address parts;
28335 int ok = ix86_decompose_address (addr, &parts);
28338 if (rip_relative_addr_p (&parts))
28342 test_if = SET_SRC (pc_set (condjmp));
28343 cond = XEXP (test_if, 0);
28344 ccode = GET_CODE (cond);
28345 /* Check whether conditional jump use Sign or Overflow Flags. */
28346 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
28353 /* Return true for TYPE_TEST and TYPE_ICMP. */
28354 if (get_attr_type (condgen) == TYPE_TEST
28355 || get_attr_type (condgen) == TYPE_ICMP)
28358 /* The following is the case that macro-fusion for alu + jmp. */
28359 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
28362 /* No fusion for alu op with memory destination operand. */
28363 dest = SET_DEST (alu_set);
28367 /* Macro-fusion for inc/dec + unsigned conditional jump is not
28369 if (get_attr_type (condgen) == TYPE_INCDEC
28379 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
28380 execution. It is applied if
28381 (1) IMUL instruction is on the top of list;
28382 (2) There exists the only producer of independent IMUL instruction in
28384 Return index of IMUL producer if it was found and -1 otherwise. */
28386 do_reorder_for_imul (rtx_insn **ready, int n_ready)
28389 rtx set, insn1, insn2;
28390 sd_iterator_def sd_it;
28395 if (!TARGET_BONNELL)
28398 /* Check that IMUL instruction is on the top of ready list. */
28399 insn = ready[n_ready - 1];
28400 set = single_set (insn);
28403 if (!(GET_CODE (SET_SRC (set)) == MULT
28404 && GET_MODE (SET_SRC (set)) == SImode))
28407 /* Search for producer of independent IMUL instruction. */
28408 for (i = n_ready - 2; i >= 0; i--)
28411 if (!NONDEBUG_INSN_P (insn))
28413 /* Skip IMUL instruction. */
28414 insn2 = PATTERN (insn);
28415 if (GET_CODE (insn2) == PARALLEL)
28416 insn2 = XVECEXP (insn2, 0, 0);
28417 if (GET_CODE (insn2) == SET
28418 && GET_CODE (SET_SRC (insn2)) == MULT
28419 && GET_MODE (SET_SRC (insn2)) == SImode)
28422 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
28425 con = DEP_CON (dep);
28426 if (!NONDEBUG_INSN_P (con))
28428 insn1 = PATTERN (con);
28429 if (GET_CODE (insn1) == PARALLEL)
28430 insn1 = XVECEXP (insn1, 0, 0);
28432 if (GET_CODE (insn1) == SET
28433 && GET_CODE (SET_SRC (insn1)) == MULT
28434 && GET_MODE (SET_SRC (insn1)) == SImode)
28436 sd_iterator_def sd_it1;
28438 /* Check if there is no other dependee for IMUL. */
28440 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
28443 pro = DEP_PRO (dep1);
28444 if (!NONDEBUG_INSN_P (pro))
28459 /* Try to find the best candidate on the top of ready list if two insns
28460 have the same priority - candidate is best if its dependees were
28461 scheduled earlier. Applied for Silvermont only.
28462 Return true if top 2 insns must be interchanged. */
28464 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
28466 rtx_insn *top = ready[n_ready - 1];
28467 rtx_insn *next = ready[n_ready - 2];
28469 sd_iterator_def sd_it;
28473 #define INSN_TICK(INSN) (HID (INSN)->tick)
28475 if (!TARGET_SILVERMONT && !TARGET_INTEL)
28478 if (!NONDEBUG_INSN_P (top))
28480 if (!NONJUMP_INSN_P (top))
28482 if (!NONDEBUG_INSN_P (next))
28484 if (!NONJUMP_INSN_P (next))
28486 set = single_set (top);
28489 set = single_set (next);
28493 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
28495 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
28497 /* Determine winner more precise. */
28498 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
28501 pro = DEP_PRO (dep);
28502 if (!NONDEBUG_INSN_P (pro))
28504 if (INSN_TICK (pro) > clock1)
28505 clock1 = INSN_TICK (pro);
28507 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
28510 pro = DEP_PRO (dep);
28511 if (!NONDEBUG_INSN_P (pro))
28513 if (INSN_TICK (pro) > clock2)
28514 clock2 = INSN_TICK (pro);
28517 if (clock1 == clock2)
28519 /* Determine winner - load must win. */
28520 enum attr_memory memory1, memory2;
28521 memory1 = get_attr_memory (top);
28522 memory2 = get_attr_memory (next);
28523 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
28526 return (bool) (clock2 < clock1);
28532 /* Perform possible reodering of ready list for Atom/Silvermont only.
28533 Return issue rate. */
28535 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
28536 int *pn_ready, int clock_var)
28538 int issue_rate = -1;
28539 int n_ready = *pn_ready;
28544 /* Set up issue rate. */
28545 issue_rate = ix86_issue_rate ();
28547 /* Do reodering for BONNELL/SILVERMONT only. */
28548 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
28551 /* Nothing to do if ready list contains only 1 instruction. */
28555 /* Do reodering for post-reload scheduler only. */
28556 if (!reload_completed)
28559 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
28561 if (sched_verbose > 1)
28562 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
28563 INSN_UID (ready[index]));
28565 /* Put IMUL producer (ready[index]) at the top of ready list. */
28566 insn = ready[index];
28567 for (i = index; i < n_ready - 1; i++)
28568 ready[i] = ready[i + 1];
28569 ready[n_ready - 1] = insn;
28573 /* Skip selective scheduling since HID is not populated in it. */
28576 && swap_top_of_ready_list (ready, n_ready))
28578 if (sched_verbose > 1)
28579 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
28580 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
28581 /* Swap 2 top elements of ready list. */
28582 insn = ready[n_ready - 1];
28583 ready[n_ready - 1] = ready[n_ready - 2];
28584 ready[n_ready - 2] = insn;
28590 ix86_class_likely_spilled_p (reg_class_t);
28592 /* Returns true if lhs of insn is HW function argument register and set up
28593 is_spilled to true if it is likely spilled HW register. */
28595 insn_is_function_arg (rtx insn, bool* is_spilled)
28599 if (!NONDEBUG_INSN_P (insn))
28601 /* Call instructions are not movable, ignore it. */
28604 insn = PATTERN (insn);
28605 if (GET_CODE (insn) == PARALLEL)
28606 insn = XVECEXP (insn, 0, 0);
28607 if (GET_CODE (insn) != SET)
28609 dst = SET_DEST (insn);
28610 if (REG_P (dst) && HARD_REGISTER_P (dst)
28611 && ix86_function_arg_regno_p (REGNO (dst)))
28613 /* Is it likely spilled HW register? */
28614 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
28615 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
28616 *is_spilled = true;
28622 /* Add output dependencies for chain of function adjacent arguments if only
28623 there is a move to likely spilled HW register. Return first argument
28624 if at least one dependence was added or NULL otherwise. */
28626 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
28629 rtx_insn *last = call;
28630 rtx_insn *first_arg = NULL;
28631 bool is_spilled = false;
28633 head = PREV_INSN (head);
28635 /* Find nearest to call argument passing instruction. */
28638 last = PREV_INSN (last);
28641 if (!NONDEBUG_INSN_P (last))
28643 if (insn_is_function_arg (last, &is_spilled))
28651 insn = PREV_INSN (last);
28652 if (!INSN_P (insn))
28656 if (!NONDEBUG_INSN_P (insn))
28661 if (insn_is_function_arg (insn, &is_spilled))
28663 /* Add output depdendence between two function arguments if chain
28664 of output arguments contains likely spilled HW registers. */
28666 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
28667 first_arg = last = insn;
28677 /* Add output or anti dependency from insn to first_arg to restrict its code
28680 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
28685 /* Add anti dependencies for bounds stores. */
28687 && GET_CODE (PATTERN (insn)) == PARALLEL
28688 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
28689 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
28691 add_dependence (first_arg, insn, REG_DEP_ANTI);
28695 set = single_set (insn);
28698 tmp = SET_DEST (set);
28701 /* Add output dependency to the first function argument. */
28702 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
28705 /* Add anti dependency. */
28706 add_dependence (first_arg, insn, REG_DEP_ANTI);
28709 /* Avoid cross block motion of function argument through adding dependency
28710 from the first non-jump instruction in bb. */
28712 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
28714 rtx_insn *insn = BB_END (bb);
28718 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
28720 rtx set = single_set (insn);
28723 avoid_func_arg_motion (arg, insn);
28727 if (insn == BB_HEAD (bb))
28729 insn = PREV_INSN (insn);
28733 /* Hook for pre-reload schedule - avoid motion of function arguments
28734 passed in likely spilled HW registers. */
28736 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
28739 rtx_insn *first_arg = NULL;
28740 if (reload_completed)
28742 while (head != tail && DEBUG_INSN_P (head))
28743 head = NEXT_INSN (head);
28744 for (insn = tail; insn != head; insn = PREV_INSN (insn))
28745 if (INSN_P (insn) && CALL_P (insn))
28747 first_arg = add_parameter_dependencies (insn, head);
28750 /* Add dependee for first argument to predecessors if only
28751 region contains more than one block. */
28752 basic_block bb = BLOCK_FOR_INSN (insn);
28753 int rgn = CONTAINING_RGN (bb->index);
28754 int nr_blks = RGN_NR_BLOCKS (rgn);
28755 /* Skip trivial regions and region head blocks that can have
28756 predecessors outside of region. */
28757 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
28762 /* Regions are SCCs with the exception of selective
28763 scheduling with pipelining of outer blocks enabled.
28764 So also check that immediate predecessors of a non-head
28765 block are in the same region. */
28766 FOR_EACH_EDGE (e, ei, bb->preds)
28768 /* Avoid creating of loop-carried dependencies through
28769 using topological ordering in the region. */
28770 if (rgn == CONTAINING_RGN (e->src->index)
28771 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
28772 add_dependee_for_func_arg (first_arg, e->src);
28780 else if (first_arg)
28781 avoid_func_arg_motion (first_arg, insn);
28784 /* Hook for pre-reload schedule - set priority of moves from likely spilled
28785 HW registers to maximum, to schedule them at soon as possible. These are
28786 moves from function argument registers at the top of the function entry
28787 and moves from function return value registers after call. */
28789 ix86_adjust_priority (rtx_insn *insn, int priority)
28793 if (reload_completed)
28796 if (!NONDEBUG_INSN_P (insn))
28799 set = single_set (insn);
28802 rtx tmp = SET_SRC (set);
28804 && HARD_REGISTER_P (tmp)
28805 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
28806 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
28807 return current_sched_info->sched_max_insns_priority;
28813 /* Model decoder of Core 2/i7.
28814 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
28815 track the instruction fetch block boundaries and make sure that long
28816 (9+ bytes) instructions are assigned to D0. */
28818 /* Maximum length of an insn that can be handled by
28819 a secondary decoder unit. '8' for Core 2/i7. */
28820 static int core2i7_secondary_decoder_max_insn_size;
28822 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
28823 '16' for Core 2/i7. */
28824 static int core2i7_ifetch_block_size;
28826 /* Maximum number of instructions decoder can handle per cycle.
28827 '6' for Core 2/i7. */
28828 static int core2i7_ifetch_block_max_insns;
28830 typedef struct ix86_first_cycle_multipass_data_ *
28831 ix86_first_cycle_multipass_data_t;
28832 typedef const struct ix86_first_cycle_multipass_data_ *
28833 const_ix86_first_cycle_multipass_data_t;
28835 /* A variable to store target state across calls to max_issue within
28837 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
28838 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
28840 /* Initialize DATA. */
28842 core2i7_first_cycle_multipass_init (void *_data)
28844 ix86_first_cycle_multipass_data_t data
28845 = (ix86_first_cycle_multipass_data_t) _data;
28847 data->ifetch_block_len = 0;
28848 data->ifetch_block_n_insns = 0;
28849 data->ready_try_change = NULL;
28850 data->ready_try_change_size = 0;
28853 /* Advancing the cycle; reset ifetch block counts. */
28855 core2i7_dfa_post_advance_cycle (void)
28857 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
28859 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
28861 data->ifetch_block_len = 0;
28862 data->ifetch_block_n_insns = 0;
28865 static int min_insn_size (rtx_insn *);
28867 /* Filter out insns from ready_try that the core will not be able to issue
28868 on current cycle due to decoder. */
28870 core2i7_first_cycle_multipass_filter_ready_try
28871 (const_ix86_first_cycle_multipass_data_t data,
28872 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
28879 if (ready_try[n_ready])
28882 insn = get_ready_element (n_ready);
28883 insn_size = min_insn_size (insn);
28885 if (/* If this is a too long an insn for a secondary decoder ... */
28886 (!first_cycle_insn_p
28887 && insn_size > core2i7_secondary_decoder_max_insn_size)
28888 /* ... or it would not fit into the ifetch block ... */
28889 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
28890 /* ... or the decoder is full already ... */
28891 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
28892 /* ... mask the insn out. */
28894 ready_try[n_ready] = 1;
28896 if (data->ready_try_change)
28897 bitmap_set_bit (data->ready_try_change, n_ready);
28902 /* Prepare for a new round of multipass lookahead scheduling. */
28904 core2i7_first_cycle_multipass_begin (void *_data,
28905 signed char *ready_try, int n_ready,
28906 bool first_cycle_insn_p)
28908 ix86_first_cycle_multipass_data_t data
28909 = (ix86_first_cycle_multipass_data_t) _data;
28910 const_ix86_first_cycle_multipass_data_t prev_data
28911 = ix86_first_cycle_multipass_data;
28913 /* Restore the state from the end of the previous round. */
28914 data->ifetch_block_len = prev_data->ifetch_block_len;
28915 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
28917 /* Filter instructions that cannot be issued on current cycle due to
28918 decoder restrictions. */
28919 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
28920 first_cycle_insn_p);
28923 /* INSN is being issued in current solution. Account for its impact on
28924 the decoder model. */
28926 core2i7_first_cycle_multipass_issue (void *_data,
28927 signed char *ready_try, int n_ready,
28928 rtx_insn *insn, const void *_prev_data)
28930 ix86_first_cycle_multipass_data_t data
28931 = (ix86_first_cycle_multipass_data_t) _data;
28932 const_ix86_first_cycle_multipass_data_t prev_data
28933 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
28935 int insn_size = min_insn_size (insn);
28937 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
28938 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
28939 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
28940 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
28942 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
28943 if (!data->ready_try_change)
28945 data->ready_try_change = sbitmap_alloc (n_ready);
28946 data->ready_try_change_size = n_ready;
28948 else if (data->ready_try_change_size < n_ready)
28950 data->ready_try_change = sbitmap_resize (data->ready_try_change,
28952 data->ready_try_change_size = n_ready;
28954 bitmap_clear (data->ready_try_change);
28956 /* Filter out insns from ready_try that the core will not be able to issue
28957 on current cycle due to decoder. */
28958 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
28962 /* Revert the effect on ready_try. */
28964 core2i7_first_cycle_multipass_backtrack (const void *_data,
28965 signed char *ready_try,
28966 int n_ready ATTRIBUTE_UNUSED)
28968 const_ix86_first_cycle_multipass_data_t data
28969 = (const_ix86_first_cycle_multipass_data_t) _data;
28970 unsigned int i = 0;
28971 sbitmap_iterator sbi;
28973 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
28974 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
28980 /* Save the result of multipass lookahead scheduling for the next round. */
28982 core2i7_first_cycle_multipass_end (const void *_data)
28984 const_ix86_first_cycle_multipass_data_t data
28985 = (const_ix86_first_cycle_multipass_data_t) _data;
28986 ix86_first_cycle_multipass_data_t next_data
28987 = ix86_first_cycle_multipass_data;
28991 next_data->ifetch_block_len = data->ifetch_block_len;
28992 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
28996 /* Deallocate target data. */
28998 core2i7_first_cycle_multipass_fini (void *_data)
29000 ix86_first_cycle_multipass_data_t data
29001 = (ix86_first_cycle_multipass_data_t) _data;
29003 if (data->ready_try_change)
29005 sbitmap_free (data->ready_try_change);
29006 data->ready_try_change = NULL;
29007 data->ready_try_change_size = 0;
29011 /* Prepare for scheduling pass. */
29013 ix86_sched_init_global (FILE *, int, int)
29015 /* Install scheduling hooks for current CPU. Some of these hooks are used
29016 in time-critical parts of the scheduler, so we only set them up when
29017 they are actually used. */
29020 case PROCESSOR_CORE2:
29021 case PROCESSOR_NEHALEM:
29022 case PROCESSOR_SANDYBRIDGE:
29023 case PROCESSOR_HASWELL:
29024 /* Do not perform multipass scheduling for pre-reload schedule
29025 to save compile time. */
29026 if (reload_completed)
29028 targetm.sched.dfa_post_advance_cycle
29029 = core2i7_dfa_post_advance_cycle;
29030 targetm.sched.first_cycle_multipass_init
29031 = core2i7_first_cycle_multipass_init;
29032 targetm.sched.first_cycle_multipass_begin
29033 = core2i7_first_cycle_multipass_begin;
29034 targetm.sched.first_cycle_multipass_issue
29035 = core2i7_first_cycle_multipass_issue;
29036 targetm.sched.first_cycle_multipass_backtrack
29037 = core2i7_first_cycle_multipass_backtrack;
29038 targetm.sched.first_cycle_multipass_end
29039 = core2i7_first_cycle_multipass_end;
29040 targetm.sched.first_cycle_multipass_fini
29041 = core2i7_first_cycle_multipass_fini;
29043 /* Set decoder parameters. */
29044 core2i7_secondary_decoder_max_insn_size = 8;
29045 core2i7_ifetch_block_size = 16;
29046 core2i7_ifetch_block_max_insns = 6;
29049 /* ... Fall through ... */
29051 targetm.sched.dfa_post_advance_cycle = NULL;
29052 targetm.sched.first_cycle_multipass_init = NULL;
29053 targetm.sched.first_cycle_multipass_begin = NULL;
29054 targetm.sched.first_cycle_multipass_issue = NULL;
29055 targetm.sched.first_cycle_multipass_backtrack = NULL;
29056 targetm.sched.first_cycle_multipass_end = NULL;
29057 targetm.sched.first_cycle_multipass_fini = NULL;
29063 /* Compute the alignment given to a constant that is being placed in memory.
29064 EXP is the constant and ALIGN is the alignment that the object would
29066 The value of this function is used instead of that alignment to align
29070 ix86_constant_alignment (tree exp, int align)
29072 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
29073 || TREE_CODE (exp) == INTEGER_CST)
29075 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
29077 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
29080 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
29081 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
29082 return BITS_PER_WORD;
29087 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
29088 the data type, and ALIGN is the alignment that the object would
29089 ordinarily have. */
29092 iamcu_alignment (tree type, int align)
29094 enum machine_mode mode;
29096 if (align < 32 || TYPE_USER_ALIGN (type))
29099 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
29101 mode = TYPE_MODE (strip_array_types (type));
29102 switch (GET_MODE_CLASS (mode))
29105 case MODE_COMPLEX_INT:
29106 case MODE_COMPLEX_FLOAT:
29108 case MODE_DECIMAL_FLOAT:
29115 /* Compute the alignment for a static variable.
29116 TYPE is the data type, and ALIGN is the alignment that
29117 the object would ordinarily have. The value of this function is used
29118 instead of that alignment to align the object. */
29121 ix86_data_alignment (tree type, int align, bool opt)
29123 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
29124 for symbols from other compilation units or symbols that don't need
29125 to bind locally. In order to preserve some ABI compatibility with
29126 those compilers, ensure we don't decrease alignment from what we
29129 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
29131 /* A data structure, equal or greater than the size of a cache line
29132 (64 bytes in the Pentium 4 and other recent Intel processors, including
29133 processors based on Intel Core microarchitecture) should be aligned
29134 so that its base address is a multiple of a cache line size. */
29137 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
29139 if (max_align < BITS_PER_WORD)
29140 max_align = BITS_PER_WORD;
29142 switch (ix86_align_data_type)
29144 case ix86_align_data_type_abi: opt = false; break;
29145 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
29146 case ix86_align_data_type_cacheline: break;
29150 align = iamcu_alignment (type, align);
29153 && AGGREGATE_TYPE_P (type)
29154 && TYPE_SIZE (type)
29155 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
29157 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
29158 && align < max_align_compat)
29159 align = max_align_compat;
29160 if (wi::geu_p (TYPE_SIZE (type), max_align)
29161 && align < max_align)
29165 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
29166 to 16byte boundary. */
29169 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
29170 && TYPE_SIZE (type)
29171 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
29172 && wi::geu_p (TYPE_SIZE (type), 128)
29180 if (TREE_CODE (type) == ARRAY_TYPE)
29182 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
29184 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
29187 else if (TREE_CODE (type) == COMPLEX_TYPE)
29190 if (TYPE_MODE (type) == DCmode && align < 64)
29192 if ((TYPE_MODE (type) == XCmode
29193 || TYPE_MODE (type) == TCmode) && align < 128)
29196 else if ((TREE_CODE (type) == RECORD_TYPE
29197 || TREE_CODE (type) == UNION_TYPE
29198 || TREE_CODE (type) == QUAL_UNION_TYPE)
29199 && TYPE_FIELDS (type))
29201 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
29203 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
29206 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
29207 || TREE_CODE (type) == INTEGER_TYPE)
29209 if (TYPE_MODE (type) == DFmode && align < 64)
29211 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
29218 /* Compute the alignment for a local variable or a stack slot. EXP is
29219 the data type or decl itself, MODE is the widest mode available and
29220 ALIGN is the alignment that the object would ordinarily have. The
29221 value of this macro is used instead of that alignment to align the
29225 ix86_local_alignment (tree exp, machine_mode mode,
29226 unsigned int align)
29230 if (exp && DECL_P (exp))
29232 type = TREE_TYPE (exp);
29241 /* Don't do dynamic stack realignment for long long objects with
29242 -mpreferred-stack-boundary=2. */
29245 && ix86_preferred_stack_boundary < 64
29246 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
29247 && (!type || !TYPE_USER_ALIGN (type))
29248 && (!decl || !DECL_USER_ALIGN (decl)))
29251 /* If TYPE is NULL, we are allocating a stack slot for caller-save
29252 register in MODE. We will return the largest alignment of XF
29256 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
29257 align = GET_MODE_ALIGNMENT (DFmode);
29261 /* Don't increase alignment for Intel MCU psABI. */
29265 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
29266 to 16byte boundary. Exact wording is:
29268 An array uses the same alignment as its elements, except that a local or
29269 global array variable of length at least 16 bytes or
29270 a C99 variable-length array variable always has alignment of at least 16 bytes.
29272 This was added to allow use of aligned SSE instructions at arrays. This
29273 rule is meant for static storage (where compiler can not do the analysis
29274 by itself). We follow it for automatic variables only when convenient.
29275 We fully control everything in the function compiled and functions from
29276 other unit can not rely on the alignment.
29278 Exclude va_list type. It is the common case of local array where
29279 we can not benefit from the alignment.
29281 TODO: Probably one should optimize for size only when var is not escaping. */
29282 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
29285 if (AGGREGATE_TYPE_P (type)
29286 && (va_list_type_node == NULL_TREE
29287 || (TYPE_MAIN_VARIANT (type)
29288 != TYPE_MAIN_VARIANT (va_list_type_node)))
29289 && TYPE_SIZE (type)
29290 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
29291 && wi::geu_p (TYPE_SIZE (type), 16)
29295 if (TREE_CODE (type) == ARRAY_TYPE)
29297 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
29299 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
29302 else if (TREE_CODE (type) == COMPLEX_TYPE)
29304 if (TYPE_MODE (type) == DCmode && align < 64)
29306 if ((TYPE_MODE (type) == XCmode
29307 || TYPE_MODE (type) == TCmode) && align < 128)
29310 else if ((TREE_CODE (type) == RECORD_TYPE
29311 || TREE_CODE (type) == UNION_TYPE
29312 || TREE_CODE (type) == QUAL_UNION_TYPE)
29313 && TYPE_FIELDS (type))
29315 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
29317 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
29320 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
29321 || TREE_CODE (type) == INTEGER_TYPE)
29324 if (TYPE_MODE (type) == DFmode && align < 64)
29326 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
29332 /* Compute the minimum required alignment for dynamic stack realignment
29333 purposes for a local variable, parameter or a stack slot. EXP is
29334 the data type or decl itself, MODE is its mode and ALIGN is the
29335 alignment that the object would ordinarily have. */
29338 ix86_minimum_alignment (tree exp, machine_mode mode,
29339 unsigned int align)
29343 if (exp && DECL_P (exp))
29345 type = TREE_TYPE (exp);
29354 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
29357 /* Don't do dynamic stack realignment for long long objects with
29358 -mpreferred-stack-boundary=2. */
29359 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
29360 && (!type || !TYPE_USER_ALIGN (type))
29361 && (!decl || !DECL_USER_ALIGN (decl)))
29363 gcc_checking_assert (!TARGET_STV);
29370 /* Find a location for the static chain incoming to a nested function.
29371 This is a register, unless all free registers are used by arguments. */
29374 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
29378 /* While this function won't be called by the middle-end when a static
29379 chain isn't needed, it's also used throughout the backend so it's
29380 easiest to keep this check centralized. */
29381 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
29386 /* We always use R10 in 64-bit mode. */
29391 const_tree fntype, fndecl;
29394 /* By default in 32-bit mode we use ECX to pass the static chain. */
29397 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
29399 fntype = TREE_TYPE (fndecl_or_type);
29400 fndecl = fndecl_or_type;
29404 fntype = fndecl_or_type;
29408 ccvt = ix86_get_callcvt (fntype);
29409 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
29411 /* Fastcall functions use ecx/edx for arguments, which leaves
29412 us with EAX for the static chain.
29413 Thiscall functions use ecx for arguments, which also
29414 leaves us with EAX for the static chain. */
29417 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
29419 /* Thiscall functions use ecx for arguments, which leaves
29420 us with EAX and EDX for the static chain.
29421 We are using for abi-compatibility EAX. */
29424 else if (ix86_function_regparm (fntype, fndecl) == 3)
29426 /* For regparm 3, we have no free call-clobbered registers in
29427 which to store the static chain. In order to implement this,
29428 we have the trampoline push the static chain to the stack.
29429 However, we can't push a value below the return address when
29430 we call the nested function directly, so we have to use an
29431 alternate entry point. For this we use ESI, and have the
29432 alternate entry point push ESI, so that things appear the
29433 same once we're executing the nested function. */
29436 if (fndecl == current_function_decl)
29437 ix86_static_chain_on_stack = true;
29438 return gen_frame_mem (SImode,
29439 plus_constant (Pmode,
29440 arg_pointer_rtx, -8));
29446 return gen_rtx_REG (Pmode, regno);
29449 /* Emit RTL insns to initialize the variable parts of a trampoline.
29450 FNDECL is the decl of the target address; M_TRAMP is a MEM for
29451 the trampoline, and CHAIN_VALUE is an RTX for the static chain
29452 to be passed to the target function. */
29455 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
29461 fnaddr = XEXP (DECL_RTL (fndecl), 0);
29467 /* Load the function address to r11. Try to load address using
29468 the shorter movl instead of movabs. We may want to support
29469 movq for kernel mode, but kernel does not use trampolines at
29470 the moment. FNADDR is a 32bit address and may not be in
29471 DImode when ptr_mode == SImode. Always use movl in this
29473 if (ptr_mode == SImode
29474 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
29476 fnaddr = copy_addr_to_reg (fnaddr);
29478 mem = adjust_address (m_tramp, HImode, offset);
29479 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
29481 mem = adjust_address (m_tramp, SImode, offset + 2);
29482 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
29487 mem = adjust_address (m_tramp, HImode, offset);
29488 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
29490 mem = adjust_address (m_tramp, DImode, offset + 2);
29491 emit_move_insn (mem, fnaddr);
29495 /* Load static chain using movabs to r10. Use the shorter movl
29496 instead of movabs when ptr_mode == SImode. */
29497 if (ptr_mode == SImode)
29508 mem = adjust_address (m_tramp, HImode, offset);
29509 emit_move_insn (mem, gen_int_mode (opcode, HImode));
29511 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
29512 emit_move_insn (mem, chain_value);
29515 /* Jump to r11; the last (unused) byte is a nop, only there to
29516 pad the write out to a single 32-bit store. */
29517 mem = adjust_address (m_tramp, SImode, offset);
29518 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
29525 /* Depending on the static chain location, either load a register
29526 with a constant, or push the constant to the stack. All of the
29527 instructions are the same size. */
29528 chain = ix86_static_chain (fndecl, true);
29531 switch (REGNO (chain))
29534 opcode = 0xb8; break;
29536 opcode = 0xb9; break;
29538 gcc_unreachable ();
29544 mem = adjust_address (m_tramp, QImode, offset);
29545 emit_move_insn (mem, gen_int_mode (opcode, QImode));
29547 mem = adjust_address (m_tramp, SImode, offset + 1);
29548 emit_move_insn (mem, chain_value);
29551 mem = adjust_address (m_tramp, QImode, offset);
29552 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
29554 mem = adjust_address (m_tramp, SImode, offset + 1);
29556 /* Compute offset from the end of the jmp to the target function.
29557 In the case in which the trampoline stores the static chain on
29558 the stack, we need to skip the first insn which pushes the
29559 (call-saved) register static chain; this push is 1 byte. */
29561 disp = expand_binop (SImode, sub_optab, fnaddr,
29562 plus_constant (Pmode, XEXP (m_tramp, 0),
29563 offset - (MEM_P (chain) ? 1 : 0)),
29564 NULL_RTX, 1, OPTAB_DIRECT);
29565 emit_move_insn (mem, disp);
29568 gcc_assert (offset <= TRAMPOLINE_SIZE);
29570 #ifdef HAVE_ENABLE_EXECUTE_STACK
29571 #ifdef CHECK_EXECUTE_STACK_ENABLED
29572 if (CHECK_EXECUTE_STACK_ENABLED)
29574 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
29575 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
29579 /* The following file contains several enumerations and data structures
29580 built from the definitions in i386-builtin-types.def. */
29582 #include "i386-builtin-types.inc"
29584 /* Table for the ix86 builtin non-function types. */
29585 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
29587 /* Retrieve an element from the above table, building some of
29588 the types lazily. */
29591 ix86_get_builtin_type (enum ix86_builtin_type tcode)
29593 unsigned int index;
29596 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
29598 type = ix86_builtin_type_tab[(int) tcode];
29602 gcc_assert (tcode > IX86_BT_LAST_PRIM);
29603 if (tcode <= IX86_BT_LAST_VECT)
29607 index = tcode - IX86_BT_LAST_PRIM - 1;
29608 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
29609 mode = ix86_builtin_type_vect_mode[index];
29611 type = build_vector_type_for_mode (itype, mode);
29617 index = tcode - IX86_BT_LAST_VECT - 1;
29618 if (tcode <= IX86_BT_LAST_PTR)
29619 quals = TYPE_UNQUALIFIED;
29621 quals = TYPE_QUAL_CONST;
29623 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
29624 if (quals != TYPE_UNQUALIFIED)
29625 itype = build_qualified_type (itype, quals);
29627 type = build_pointer_type (itype);
29630 ix86_builtin_type_tab[(int) tcode] = type;
29634 /* Table for the ix86 builtin function types. */
29635 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
29637 /* Retrieve an element from the above table, building some of
29638 the types lazily. */
29641 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
29645 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
29647 type = ix86_builtin_func_type_tab[(int) tcode];
29651 if (tcode <= IX86_BT_LAST_FUNC)
29653 unsigned start = ix86_builtin_func_start[(int) tcode];
29654 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
29655 tree rtype, atype, args = void_list_node;
29658 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
29659 for (i = after - 1; i > start; --i)
29661 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
29662 args = tree_cons (NULL, atype, args);
29665 type = build_function_type (rtype, args);
29669 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
29670 enum ix86_builtin_func_type icode;
29672 icode = ix86_builtin_func_alias_base[index];
29673 type = ix86_get_builtin_func_type (icode);
29676 ix86_builtin_func_type_tab[(int) tcode] = type;
29681 /* Codes for all the SSE/MMX builtins. */
29684 IX86_BUILTIN_ADDPS,
29685 IX86_BUILTIN_ADDSS,
29686 IX86_BUILTIN_DIVPS,
29687 IX86_BUILTIN_DIVSS,
29688 IX86_BUILTIN_MULPS,
29689 IX86_BUILTIN_MULSS,
29690 IX86_BUILTIN_SUBPS,
29691 IX86_BUILTIN_SUBSS,
29693 IX86_BUILTIN_CMPEQPS,
29694 IX86_BUILTIN_CMPLTPS,
29695 IX86_BUILTIN_CMPLEPS,
29696 IX86_BUILTIN_CMPGTPS,
29697 IX86_BUILTIN_CMPGEPS,
29698 IX86_BUILTIN_CMPNEQPS,
29699 IX86_BUILTIN_CMPNLTPS,
29700 IX86_BUILTIN_CMPNLEPS,
29701 IX86_BUILTIN_CMPNGTPS,
29702 IX86_BUILTIN_CMPNGEPS,
29703 IX86_BUILTIN_CMPORDPS,
29704 IX86_BUILTIN_CMPUNORDPS,
29705 IX86_BUILTIN_CMPEQSS,
29706 IX86_BUILTIN_CMPLTSS,
29707 IX86_BUILTIN_CMPLESS,
29708 IX86_BUILTIN_CMPNEQSS,
29709 IX86_BUILTIN_CMPNLTSS,
29710 IX86_BUILTIN_CMPNLESS,
29711 IX86_BUILTIN_CMPORDSS,
29712 IX86_BUILTIN_CMPUNORDSS,
29714 IX86_BUILTIN_COMIEQSS,
29715 IX86_BUILTIN_COMILTSS,
29716 IX86_BUILTIN_COMILESS,
29717 IX86_BUILTIN_COMIGTSS,
29718 IX86_BUILTIN_COMIGESS,
29719 IX86_BUILTIN_COMINEQSS,
29720 IX86_BUILTIN_UCOMIEQSS,
29721 IX86_BUILTIN_UCOMILTSS,
29722 IX86_BUILTIN_UCOMILESS,
29723 IX86_BUILTIN_UCOMIGTSS,
29724 IX86_BUILTIN_UCOMIGESS,
29725 IX86_BUILTIN_UCOMINEQSS,
29727 IX86_BUILTIN_CVTPI2PS,
29728 IX86_BUILTIN_CVTPS2PI,
29729 IX86_BUILTIN_CVTSI2SS,
29730 IX86_BUILTIN_CVTSI642SS,
29731 IX86_BUILTIN_CVTSS2SI,
29732 IX86_BUILTIN_CVTSS2SI64,
29733 IX86_BUILTIN_CVTTPS2PI,
29734 IX86_BUILTIN_CVTTSS2SI,
29735 IX86_BUILTIN_CVTTSS2SI64,
29737 IX86_BUILTIN_MAXPS,
29738 IX86_BUILTIN_MAXSS,
29739 IX86_BUILTIN_MINPS,
29740 IX86_BUILTIN_MINSS,
29742 IX86_BUILTIN_LOADUPS,
29743 IX86_BUILTIN_STOREUPS,
29744 IX86_BUILTIN_MOVSS,
29746 IX86_BUILTIN_MOVHLPS,
29747 IX86_BUILTIN_MOVLHPS,
29748 IX86_BUILTIN_LOADHPS,
29749 IX86_BUILTIN_LOADLPS,
29750 IX86_BUILTIN_STOREHPS,
29751 IX86_BUILTIN_STORELPS,
29753 IX86_BUILTIN_MASKMOVQ,
29754 IX86_BUILTIN_MOVMSKPS,
29755 IX86_BUILTIN_PMOVMSKB,
29757 IX86_BUILTIN_MOVNTPS,
29758 IX86_BUILTIN_MOVNTQ,
29760 IX86_BUILTIN_LOADDQU,
29761 IX86_BUILTIN_STOREDQU,
29763 IX86_BUILTIN_PACKSSWB,
29764 IX86_BUILTIN_PACKSSDW,
29765 IX86_BUILTIN_PACKUSWB,
29767 IX86_BUILTIN_PADDB,
29768 IX86_BUILTIN_PADDW,
29769 IX86_BUILTIN_PADDD,
29770 IX86_BUILTIN_PADDQ,
29771 IX86_BUILTIN_PADDSB,
29772 IX86_BUILTIN_PADDSW,
29773 IX86_BUILTIN_PADDUSB,
29774 IX86_BUILTIN_PADDUSW,
29775 IX86_BUILTIN_PSUBB,
29776 IX86_BUILTIN_PSUBW,
29777 IX86_BUILTIN_PSUBD,
29778 IX86_BUILTIN_PSUBQ,
29779 IX86_BUILTIN_PSUBSB,
29780 IX86_BUILTIN_PSUBSW,
29781 IX86_BUILTIN_PSUBUSB,
29782 IX86_BUILTIN_PSUBUSW,
29785 IX86_BUILTIN_PANDN,
29789 IX86_BUILTIN_PAVGB,
29790 IX86_BUILTIN_PAVGW,
29792 IX86_BUILTIN_PCMPEQB,
29793 IX86_BUILTIN_PCMPEQW,
29794 IX86_BUILTIN_PCMPEQD,
29795 IX86_BUILTIN_PCMPGTB,
29796 IX86_BUILTIN_PCMPGTW,
29797 IX86_BUILTIN_PCMPGTD,
29799 IX86_BUILTIN_PMADDWD,
29801 IX86_BUILTIN_PMAXSW,
29802 IX86_BUILTIN_PMAXUB,
29803 IX86_BUILTIN_PMINSW,
29804 IX86_BUILTIN_PMINUB,
29806 IX86_BUILTIN_PMULHUW,
29807 IX86_BUILTIN_PMULHW,
29808 IX86_BUILTIN_PMULLW,
29810 IX86_BUILTIN_PSADBW,
29811 IX86_BUILTIN_PSHUFW,
29813 IX86_BUILTIN_PSLLW,
29814 IX86_BUILTIN_PSLLD,
29815 IX86_BUILTIN_PSLLQ,
29816 IX86_BUILTIN_PSRAW,
29817 IX86_BUILTIN_PSRAD,
29818 IX86_BUILTIN_PSRLW,
29819 IX86_BUILTIN_PSRLD,
29820 IX86_BUILTIN_PSRLQ,
29821 IX86_BUILTIN_PSLLWI,
29822 IX86_BUILTIN_PSLLDI,
29823 IX86_BUILTIN_PSLLQI,
29824 IX86_BUILTIN_PSRAWI,
29825 IX86_BUILTIN_PSRADI,
29826 IX86_BUILTIN_PSRLWI,
29827 IX86_BUILTIN_PSRLDI,
29828 IX86_BUILTIN_PSRLQI,
29830 IX86_BUILTIN_PUNPCKHBW,
29831 IX86_BUILTIN_PUNPCKHWD,
29832 IX86_BUILTIN_PUNPCKHDQ,
29833 IX86_BUILTIN_PUNPCKLBW,
29834 IX86_BUILTIN_PUNPCKLWD,
29835 IX86_BUILTIN_PUNPCKLDQ,
29837 IX86_BUILTIN_SHUFPS,
29839 IX86_BUILTIN_RCPPS,
29840 IX86_BUILTIN_RCPSS,
29841 IX86_BUILTIN_RSQRTPS,
29842 IX86_BUILTIN_RSQRTPS_NR,
29843 IX86_BUILTIN_RSQRTSS,
29844 IX86_BUILTIN_RSQRTF,
29845 IX86_BUILTIN_SQRTPS,
29846 IX86_BUILTIN_SQRTPS_NR,
29847 IX86_BUILTIN_SQRTSS,
29849 IX86_BUILTIN_UNPCKHPS,
29850 IX86_BUILTIN_UNPCKLPS,
29852 IX86_BUILTIN_ANDPS,
29853 IX86_BUILTIN_ANDNPS,
29855 IX86_BUILTIN_XORPS,
29858 IX86_BUILTIN_LDMXCSR,
29859 IX86_BUILTIN_STMXCSR,
29860 IX86_BUILTIN_SFENCE,
29862 IX86_BUILTIN_FXSAVE,
29863 IX86_BUILTIN_FXRSTOR,
29864 IX86_BUILTIN_FXSAVE64,
29865 IX86_BUILTIN_FXRSTOR64,
29867 IX86_BUILTIN_XSAVE,
29868 IX86_BUILTIN_XRSTOR,
29869 IX86_BUILTIN_XSAVE64,
29870 IX86_BUILTIN_XRSTOR64,
29872 IX86_BUILTIN_XSAVEOPT,
29873 IX86_BUILTIN_XSAVEOPT64,
29875 IX86_BUILTIN_XSAVEC,
29876 IX86_BUILTIN_XSAVEC64,
29878 IX86_BUILTIN_XSAVES,
29879 IX86_BUILTIN_XRSTORS,
29880 IX86_BUILTIN_XSAVES64,
29881 IX86_BUILTIN_XRSTORS64,
29883 /* 3DNow! Original */
29884 IX86_BUILTIN_FEMMS,
29885 IX86_BUILTIN_PAVGUSB,
29886 IX86_BUILTIN_PF2ID,
29887 IX86_BUILTIN_PFACC,
29888 IX86_BUILTIN_PFADD,
29889 IX86_BUILTIN_PFCMPEQ,
29890 IX86_BUILTIN_PFCMPGE,
29891 IX86_BUILTIN_PFCMPGT,
29892 IX86_BUILTIN_PFMAX,
29893 IX86_BUILTIN_PFMIN,
29894 IX86_BUILTIN_PFMUL,
29895 IX86_BUILTIN_PFRCP,
29896 IX86_BUILTIN_PFRCPIT1,
29897 IX86_BUILTIN_PFRCPIT2,
29898 IX86_BUILTIN_PFRSQIT1,
29899 IX86_BUILTIN_PFRSQRT,
29900 IX86_BUILTIN_PFSUB,
29901 IX86_BUILTIN_PFSUBR,
29902 IX86_BUILTIN_PI2FD,
29903 IX86_BUILTIN_PMULHRW,
29905 /* 3DNow! Athlon Extensions */
29906 IX86_BUILTIN_PF2IW,
29907 IX86_BUILTIN_PFNACC,
29908 IX86_BUILTIN_PFPNACC,
29909 IX86_BUILTIN_PI2FW,
29910 IX86_BUILTIN_PSWAPDSI,
29911 IX86_BUILTIN_PSWAPDSF,
29914 IX86_BUILTIN_ADDPD,
29915 IX86_BUILTIN_ADDSD,
29916 IX86_BUILTIN_DIVPD,
29917 IX86_BUILTIN_DIVSD,
29918 IX86_BUILTIN_MULPD,
29919 IX86_BUILTIN_MULSD,
29920 IX86_BUILTIN_SUBPD,
29921 IX86_BUILTIN_SUBSD,
29923 IX86_BUILTIN_CMPEQPD,
29924 IX86_BUILTIN_CMPLTPD,
29925 IX86_BUILTIN_CMPLEPD,
29926 IX86_BUILTIN_CMPGTPD,
29927 IX86_BUILTIN_CMPGEPD,
29928 IX86_BUILTIN_CMPNEQPD,
29929 IX86_BUILTIN_CMPNLTPD,
29930 IX86_BUILTIN_CMPNLEPD,
29931 IX86_BUILTIN_CMPNGTPD,
29932 IX86_BUILTIN_CMPNGEPD,
29933 IX86_BUILTIN_CMPORDPD,
29934 IX86_BUILTIN_CMPUNORDPD,
29935 IX86_BUILTIN_CMPEQSD,
29936 IX86_BUILTIN_CMPLTSD,
29937 IX86_BUILTIN_CMPLESD,
29938 IX86_BUILTIN_CMPNEQSD,
29939 IX86_BUILTIN_CMPNLTSD,
29940 IX86_BUILTIN_CMPNLESD,
29941 IX86_BUILTIN_CMPORDSD,
29942 IX86_BUILTIN_CMPUNORDSD,
29944 IX86_BUILTIN_COMIEQSD,
29945 IX86_BUILTIN_COMILTSD,
29946 IX86_BUILTIN_COMILESD,
29947 IX86_BUILTIN_COMIGTSD,
29948 IX86_BUILTIN_COMIGESD,
29949 IX86_BUILTIN_COMINEQSD,
29950 IX86_BUILTIN_UCOMIEQSD,
29951 IX86_BUILTIN_UCOMILTSD,
29952 IX86_BUILTIN_UCOMILESD,
29953 IX86_BUILTIN_UCOMIGTSD,
29954 IX86_BUILTIN_UCOMIGESD,
29955 IX86_BUILTIN_UCOMINEQSD,
29957 IX86_BUILTIN_MAXPD,
29958 IX86_BUILTIN_MAXSD,
29959 IX86_BUILTIN_MINPD,
29960 IX86_BUILTIN_MINSD,
29962 IX86_BUILTIN_ANDPD,
29963 IX86_BUILTIN_ANDNPD,
29965 IX86_BUILTIN_XORPD,
29967 IX86_BUILTIN_SQRTPD,
29968 IX86_BUILTIN_SQRTSD,
29970 IX86_BUILTIN_UNPCKHPD,
29971 IX86_BUILTIN_UNPCKLPD,
29973 IX86_BUILTIN_SHUFPD,
29975 IX86_BUILTIN_LOADUPD,
29976 IX86_BUILTIN_STOREUPD,
29977 IX86_BUILTIN_MOVSD,
29979 IX86_BUILTIN_LOADHPD,
29980 IX86_BUILTIN_LOADLPD,
29982 IX86_BUILTIN_CVTDQ2PD,
29983 IX86_BUILTIN_CVTDQ2PS,
29985 IX86_BUILTIN_CVTPD2DQ,
29986 IX86_BUILTIN_CVTPD2PI,
29987 IX86_BUILTIN_CVTPD2PS,
29988 IX86_BUILTIN_CVTTPD2DQ,
29989 IX86_BUILTIN_CVTTPD2PI,
29991 IX86_BUILTIN_CVTPI2PD,
29992 IX86_BUILTIN_CVTSI2SD,
29993 IX86_BUILTIN_CVTSI642SD,
29995 IX86_BUILTIN_CVTSD2SI,
29996 IX86_BUILTIN_CVTSD2SI64,
29997 IX86_BUILTIN_CVTSD2SS,
29998 IX86_BUILTIN_CVTSS2SD,
29999 IX86_BUILTIN_CVTTSD2SI,
30000 IX86_BUILTIN_CVTTSD2SI64,
30002 IX86_BUILTIN_CVTPS2DQ,
30003 IX86_BUILTIN_CVTPS2PD,
30004 IX86_BUILTIN_CVTTPS2DQ,
30006 IX86_BUILTIN_MOVNTI,
30007 IX86_BUILTIN_MOVNTI64,
30008 IX86_BUILTIN_MOVNTPD,
30009 IX86_BUILTIN_MOVNTDQ,
30011 IX86_BUILTIN_MOVQ128,
30014 IX86_BUILTIN_MASKMOVDQU,
30015 IX86_BUILTIN_MOVMSKPD,
30016 IX86_BUILTIN_PMOVMSKB128,
30018 IX86_BUILTIN_PACKSSWB128,
30019 IX86_BUILTIN_PACKSSDW128,
30020 IX86_BUILTIN_PACKUSWB128,
30022 IX86_BUILTIN_PADDB128,
30023 IX86_BUILTIN_PADDW128,
30024 IX86_BUILTIN_PADDD128,
30025 IX86_BUILTIN_PADDQ128,
30026 IX86_BUILTIN_PADDSB128,
30027 IX86_BUILTIN_PADDSW128,
30028 IX86_BUILTIN_PADDUSB128,
30029 IX86_BUILTIN_PADDUSW128,
30030 IX86_BUILTIN_PSUBB128,
30031 IX86_BUILTIN_PSUBW128,
30032 IX86_BUILTIN_PSUBD128,
30033 IX86_BUILTIN_PSUBQ128,
30034 IX86_BUILTIN_PSUBSB128,
30035 IX86_BUILTIN_PSUBSW128,
30036 IX86_BUILTIN_PSUBUSB128,
30037 IX86_BUILTIN_PSUBUSW128,
30039 IX86_BUILTIN_PAND128,
30040 IX86_BUILTIN_PANDN128,
30041 IX86_BUILTIN_POR128,
30042 IX86_BUILTIN_PXOR128,
30044 IX86_BUILTIN_PAVGB128,
30045 IX86_BUILTIN_PAVGW128,
30047 IX86_BUILTIN_PCMPEQB128,
30048 IX86_BUILTIN_PCMPEQW128,
30049 IX86_BUILTIN_PCMPEQD128,
30050 IX86_BUILTIN_PCMPGTB128,
30051 IX86_BUILTIN_PCMPGTW128,
30052 IX86_BUILTIN_PCMPGTD128,
30054 IX86_BUILTIN_PMADDWD128,
30056 IX86_BUILTIN_PMAXSW128,
30057 IX86_BUILTIN_PMAXUB128,
30058 IX86_BUILTIN_PMINSW128,
30059 IX86_BUILTIN_PMINUB128,
30061 IX86_BUILTIN_PMULUDQ,
30062 IX86_BUILTIN_PMULUDQ128,
30063 IX86_BUILTIN_PMULHUW128,
30064 IX86_BUILTIN_PMULHW128,
30065 IX86_BUILTIN_PMULLW128,
30067 IX86_BUILTIN_PSADBW128,
30068 IX86_BUILTIN_PSHUFHW,
30069 IX86_BUILTIN_PSHUFLW,
30070 IX86_BUILTIN_PSHUFD,
30072 IX86_BUILTIN_PSLLDQI128,
30073 IX86_BUILTIN_PSLLWI128,
30074 IX86_BUILTIN_PSLLDI128,
30075 IX86_BUILTIN_PSLLQI128,
30076 IX86_BUILTIN_PSRAWI128,
30077 IX86_BUILTIN_PSRADI128,
30078 IX86_BUILTIN_PSRLDQI128,
30079 IX86_BUILTIN_PSRLWI128,
30080 IX86_BUILTIN_PSRLDI128,
30081 IX86_BUILTIN_PSRLQI128,
30083 IX86_BUILTIN_PSLLDQ128,
30084 IX86_BUILTIN_PSLLW128,
30085 IX86_BUILTIN_PSLLD128,
30086 IX86_BUILTIN_PSLLQ128,
30087 IX86_BUILTIN_PSRAW128,
30088 IX86_BUILTIN_PSRAD128,
30089 IX86_BUILTIN_PSRLW128,
30090 IX86_BUILTIN_PSRLD128,
30091 IX86_BUILTIN_PSRLQ128,
30093 IX86_BUILTIN_PUNPCKHBW128,
30094 IX86_BUILTIN_PUNPCKHWD128,
30095 IX86_BUILTIN_PUNPCKHDQ128,
30096 IX86_BUILTIN_PUNPCKHQDQ128,
30097 IX86_BUILTIN_PUNPCKLBW128,
30098 IX86_BUILTIN_PUNPCKLWD128,
30099 IX86_BUILTIN_PUNPCKLDQ128,
30100 IX86_BUILTIN_PUNPCKLQDQ128,
30102 IX86_BUILTIN_CLFLUSH,
30103 IX86_BUILTIN_MFENCE,
30104 IX86_BUILTIN_LFENCE,
30105 IX86_BUILTIN_PAUSE,
30107 IX86_BUILTIN_FNSTENV,
30108 IX86_BUILTIN_FLDENV,
30109 IX86_BUILTIN_FNSTSW,
30110 IX86_BUILTIN_FNCLEX,
30112 IX86_BUILTIN_BSRSI,
30113 IX86_BUILTIN_BSRDI,
30114 IX86_BUILTIN_RDPMC,
30115 IX86_BUILTIN_RDTSC,
30116 IX86_BUILTIN_RDTSCP,
30117 IX86_BUILTIN_ROLQI,
30118 IX86_BUILTIN_ROLHI,
30119 IX86_BUILTIN_RORQI,
30120 IX86_BUILTIN_RORHI,
30123 IX86_BUILTIN_ADDSUBPS,
30124 IX86_BUILTIN_HADDPS,
30125 IX86_BUILTIN_HSUBPS,
30126 IX86_BUILTIN_MOVSHDUP,
30127 IX86_BUILTIN_MOVSLDUP,
30128 IX86_BUILTIN_ADDSUBPD,
30129 IX86_BUILTIN_HADDPD,
30130 IX86_BUILTIN_HSUBPD,
30131 IX86_BUILTIN_LDDQU,
30133 IX86_BUILTIN_MONITOR,
30134 IX86_BUILTIN_MWAIT,
30135 IX86_BUILTIN_CLZERO,
30138 IX86_BUILTIN_PHADDW,
30139 IX86_BUILTIN_PHADDD,
30140 IX86_BUILTIN_PHADDSW,
30141 IX86_BUILTIN_PHSUBW,
30142 IX86_BUILTIN_PHSUBD,
30143 IX86_BUILTIN_PHSUBSW,
30144 IX86_BUILTIN_PMADDUBSW,
30145 IX86_BUILTIN_PMULHRSW,
30146 IX86_BUILTIN_PSHUFB,
30147 IX86_BUILTIN_PSIGNB,
30148 IX86_BUILTIN_PSIGNW,
30149 IX86_BUILTIN_PSIGND,
30150 IX86_BUILTIN_PALIGNR,
30151 IX86_BUILTIN_PABSB,
30152 IX86_BUILTIN_PABSW,
30153 IX86_BUILTIN_PABSD,
30155 IX86_BUILTIN_PHADDW128,
30156 IX86_BUILTIN_PHADDD128,
30157 IX86_BUILTIN_PHADDSW128,
30158 IX86_BUILTIN_PHSUBW128,
30159 IX86_BUILTIN_PHSUBD128,
30160 IX86_BUILTIN_PHSUBSW128,
30161 IX86_BUILTIN_PMADDUBSW128,
30162 IX86_BUILTIN_PMULHRSW128,
30163 IX86_BUILTIN_PSHUFB128,
30164 IX86_BUILTIN_PSIGNB128,
30165 IX86_BUILTIN_PSIGNW128,
30166 IX86_BUILTIN_PSIGND128,
30167 IX86_BUILTIN_PALIGNR128,
30168 IX86_BUILTIN_PABSB128,
30169 IX86_BUILTIN_PABSW128,
30170 IX86_BUILTIN_PABSD128,
30172 /* AMDFAM10 - SSE4A New Instructions. */
30173 IX86_BUILTIN_MOVNTSD,
30174 IX86_BUILTIN_MOVNTSS,
30175 IX86_BUILTIN_EXTRQI,
30176 IX86_BUILTIN_EXTRQ,
30177 IX86_BUILTIN_INSERTQI,
30178 IX86_BUILTIN_INSERTQ,
30181 IX86_BUILTIN_BLENDPD,
30182 IX86_BUILTIN_BLENDPS,
30183 IX86_BUILTIN_BLENDVPD,
30184 IX86_BUILTIN_BLENDVPS,
30185 IX86_BUILTIN_PBLENDVB128,
30186 IX86_BUILTIN_PBLENDW128,
30191 IX86_BUILTIN_INSERTPS128,
30193 IX86_BUILTIN_MOVNTDQA,
30194 IX86_BUILTIN_MPSADBW128,
30195 IX86_BUILTIN_PACKUSDW128,
30196 IX86_BUILTIN_PCMPEQQ,
30197 IX86_BUILTIN_PHMINPOSUW128,
30199 IX86_BUILTIN_PMAXSB128,
30200 IX86_BUILTIN_PMAXSD128,
30201 IX86_BUILTIN_PMAXUD128,
30202 IX86_BUILTIN_PMAXUW128,
30204 IX86_BUILTIN_PMINSB128,
30205 IX86_BUILTIN_PMINSD128,
30206 IX86_BUILTIN_PMINUD128,
30207 IX86_BUILTIN_PMINUW128,
30209 IX86_BUILTIN_PMOVSXBW128,
30210 IX86_BUILTIN_PMOVSXBD128,
30211 IX86_BUILTIN_PMOVSXBQ128,
30212 IX86_BUILTIN_PMOVSXWD128,
30213 IX86_BUILTIN_PMOVSXWQ128,
30214 IX86_BUILTIN_PMOVSXDQ128,
30216 IX86_BUILTIN_PMOVZXBW128,
30217 IX86_BUILTIN_PMOVZXBD128,
30218 IX86_BUILTIN_PMOVZXBQ128,
30219 IX86_BUILTIN_PMOVZXWD128,
30220 IX86_BUILTIN_PMOVZXWQ128,
30221 IX86_BUILTIN_PMOVZXDQ128,
30223 IX86_BUILTIN_PMULDQ128,
30224 IX86_BUILTIN_PMULLD128,
30226 IX86_BUILTIN_ROUNDSD,
30227 IX86_BUILTIN_ROUNDSS,
30229 IX86_BUILTIN_ROUNDPD,
30230 IX86_BUILTIN_ROUNDPS,
30232 IX86_BUILTIN_FLOORPD,
30233 IX86_BUILTIN_CEILPD,
30234 IX86_BUILTIN_TRUNCPD,
30235 IX86_BUILTIN_RINTPD,
30236 IX86_BUILTIN_ROUNDPD_AZ,
30238 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
30239 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
30240 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
30242 IX86_BUILTIN_FLOORPS,
30243 IX86_BUILTIN_CEILPS,
30244 IX86_BUILTIN_TRUNCPS,
30245 IX86_BUILTIN_RINTPS,
30246 IX86_BUILTIN_ROUNDPS_AZ,
30248 IX86_BUILTIN_FLOORPS_SFIX,
30249 IX86_BUILTIN_CEILPS_SFIX,
30250 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
30252 IX86_BUILTIN_PTESTZ,
30253 IX86_BUILTIN_PTESTC,
30254 IX86_BUILTIN_PTESTNZC,
30256 IX86_BUILTIN_VEC_INIT_V2SI,
30257 IX86_BUILTIN_VEC_INIT_V4HI,
30258 IX86_BUILTIN_VEC_INIT_V8QI,
30259 IX86_BUILTIN_VEC_EXT_V2DF,
30260 IX86_BUILTIN_VEC_EXT_V2DI,
30261 IX86_BUILTIN_VEC_EXT_V4SF,
30262 IX86_BUILTIN_VEC_EXT_V4SI,
30263 IX86_BUILTIN_VEC_EXT_V8HI,
30264 IX86_BUILTIN_VEC_EXT_V2SI,
30265 IX86_BUILTIN_VEC_EXT_V4HI,
30266 IX86_BUILTIN_VEC_EXT_V16QI,
30267 IX86_BUILTIN_VEC_SET_V2DI,
30268 IX86_BUILTIN_VEC_SET_V4SF,
30269 IX86_BUILTIN_VEC_SET_V4SI,
30270 IX86_BUILTIN_VEC_SET_V8HI,
30271 IX86_BUILTIN_VEC_SET_V4HI,
30272 IX86_BUILTIN_VEC_SET_V16QI,
30274 IX86_BUILTIN_VEC_PACK_SFIX,
30275 IX86_BUILTIN_VEC_PACK_SFIX256,
30278 IX86_BUILTIN_CRC32QI,
30279 IX86_BUILTIN_CRC32HI,
30280 IX86_BUILTIN_CRC32SI,
30281 IX86_BUILTIN_CRC32DI,
30283 IX86_BUILTIN_PCMPESTRI128,
30284 IX86_BUILTIN_PCMPESTRM128,
30285 IX86_BUILTIN_PCMPESTRA128,
30286 IX86_BUILTIN_PCMPESTRC128,
30287 IX86_BUILTIN_PCMPESTRO128,
30288 IX86_BUILTIN_PCMPESTRS128,
30289 IX86_BUILTIN_PCMPESTRZ128,
30290 IX86_BUILTIN_PCMPISTRI128,
30291 IX86_BUILTIN_PCMPISTRM128,
30292 IX86_BUILTIN_PCMPISTRA128,
30293 IX86_BUILTIN_PCMPISTRC128,
30294 IX86_BUILTIN_PCMPISTRO128,
30295 IX86_BUILTIN_PCMPISTRS128,
30296 IX86_BUILTIN_PCMPISTRZ128,
30298 IX86_BUILTIN_PCMPGTQ,
30300 /* AES instructions */
30301 IX86_BUILTIN_AESENC128,
30302 IX86_BUILTIN_AESENCLAST128,
30303 IX86_BUILTIN_AESDEC128,
30304 IX86_BUILTIN_AESDECLAST128,
30305 IX86_BUILTIN_AESIMC128,
30306 IX86_BUILTIN_AESKEYGENASSIST128,
30308 /* PCLMUL instruction */
30309 IX86_BUILTIN_PCLMULQDQ128,
30312 IX86_BUILTIN_ADDPD256,
30313 IX86_BUILTIN_ADDPS256,
30314 IX86_BUILTIN_ADDSUBPD256,
30315 IX86_BUILTIN_ADDSUBPS256,
30316 IX86_BUILTIN_ANDPD256,
30317 IX86_BUILTIN_ANDPS256,
30318 IX86_BUILTIN_ANDNPD256,
30319 IX86_BUILTIN_ANDNPS256,
30320 IX86_BUILTIN_BLENDPD256,
30321 IX86_BUILTIN_BLENDPS256,
30322 IX86_BUILTIN_BLENDVPD256,
30323 IX86_BUILTIN_BLENDVPS256,
30324 IX86_BUILTIN_DIVPD256,
30325 IX86_BUILTIN_DIVPS256,
30326 IX86_BUILTIN_DPPS256,
30327 IX86_BUILTIN_HADDPD256,
30328 IX86_BUILTIN_HADDPS256,
30329 IX86_BUILTIN_HSUBPD256,
30330 IX86_BUILTIN_HSUBPS256,
30331 IX86_BUILTIN_MAXPD256,
30332 IX86_BUILTIN_MAXPS256,
30333 IX86_BUILTIN_MINPD256,
30334 IX86_BUILTIN_MINPS256,
30335 IX86_BUILTIN_MULPD256,
30336 IX86_BUILTIN_MULPS256,
30337 IX86_BUILTIN_ORPD256,
30338 IX86_BUILTIN_ORPS256,
30339 IX86_BUILTIN_SHUFPD256,
30340 IX86_BUILTIN_SHUFPS256,
30341 IX86_BUILTIN_SUBPD256,
30342 IX86_BUILTIN_SUBPS256,
30343 IX86_BUILTIN_XORPD256,
30344 IX86_BUILTIN_XORPS256,
30345 IX86_BUILTIN_CMPSD,
30346 IX86_BUILTIN_CMPSS,
30347 IX86_BUILTIN_CMPPD,
30348 IX86_BUILTIN_CMPPS,
30349 IX86_BUILTIN_CMPPD256,
30350 IX86_BUILTIN_CMPPS256,
30351 IX86_BUILTIN_CVTDQ2PD256,
30352 IX86_BUILTIN_CVTDQ2PS256,
30353 IX86_BUILTIN_CVTPD2PS256,
30354 IX86_BUILTIN_CVTPS2DQ256,
30355 IX86_BUILTIN_CVTPS2PD256,
30356 IX86_BUILTIN_CVTTPD2DQ256,
30357 IX86_BUILTIN_CVTPD2DQ256,
30358 IX86_BUILTIN_CVTTPS2DQ256,
30359 IX86_BUILTIN_EXTRACTF128PD256,
30360 IX86_BUILTIN_EXTRACTF128PS256,
30361 IX86_BUILTIN_EXTRACTF128SI256,
30362 IX86_BUILTIN_VZEROALL,
30363 IX86_BUILTIN_VZEROUPPER,
30364 IX86_BUILTIN_VPERMILVARPD,
30365 IX86_BUILTIN_VPERMILVARPS,
30366 IX86_BUILTIN_VPERMILVARPD256,
30367 IX86_BUILTIN_VPERMILVARPS256,
30368 IX86_BUILTIN_VPERMILPD,
30369 IX86_BUILTIN_VPERMILPS,
30370 IX86_BUILTIN_VPERMILPD256,
30371 IX86_BUILTIN_VPERMILPS256,
30372 IX86_BUILTIN_VPERMIL2PD,
30373 IX86_BUILTIN_VPERMIL2PS,
30374 IX86_BUILTIN_VPERMIL2PD256,
30375 IX86_BUILTIN_VPERMIL2PS256,
30376 IX86_BUILTIN_VPERM2F128PD256,
30377 IX86_BUILTIN_VPERM2F128PS256,
30378 IX86_BUILTIN_VPERM2F128SI256,
30379 IX86_BUILTIN_VBROADCASTSS,
30380 IX86_BUILTIN_VBROADCASTSD256,
30381 IX86_BUILTIN_VBROADCASTSS256,
30382 IX86_BUILTIN_VBROADCASTPD256,
30383 IX86_BUILTIN_VBROADCASTPS256,
30384 IX86_BUILTIN_VINSERTF128PD256,
30385 IX86_BUILTIN_VINSERTF128PS256,
30386 IX86_BUILTIN_VINSERTF128SI256,
30387 IX86_BUILTIN_LOADUPD256,
30388 IX86_BUILTIN_LOADUPS256,
30389 IX86_BUILTIN_STOREUPD256,
30390 IX86_BUILTIN_STOREUPS256,
30391 IX86_BUILTIN_LDDQU256,
30392 IX86_BUILTIN_MOVNTDQ256,
30393 IX86_BUILTIN_MOVNTPD256,
30394 IX86_BUILTIN_MOVNTPS256,
30395 IX86_BUILTIN_LOADDQU256,
30396 IX86_BUILTIN_STOREDQU256,
30397 IX86_BUILTIN_MASKLOADPD,
30398 IX86_BUILTIN_MASKLOADPS,
30399 IX86_BUILTIN_MASKSTOREPD,
30400 IX86_BUILTIN_MASKSTOREPS,
30401 IX86_BUILTIN_MASKLOADPD256,
30402 IX86_BUILTIN_MASKLOADPS256,
30403 IX86_BUILTIN_MASKSTOREPD256,
30404 IX86_BUILTIN_MASKSTOREPS256,
30405 IX86_BUILTIN_MOVSHDUP256,
30406 IX86_BUILTIN_MOVSLDUP256,
30407 IX86_BUILTIN_MOVDDUP256,
30409 IX86_BUILTIN_SQRTPD256,
30410 IX86_BUILTIN_SQRTPS256,
30411 IX86_BUILTIN_SQRTPS_NR256,
30412 IX86_BUILTIN_RSQRTPS256,
30413 IX86_BUILTIN_RSQRTPS_NR256,
30415 IX86_BUILTIN_RCPPS256,
30417 IX86_BUILTIN_ROUNDPD256,
30418 IX86_BUILTIN_ROUNDPS256,
30420 IX86_BUILTIN_FLOORPD256,
30421 IX86_BUILTIN_CEILPD256,
30422 IX86_BUILTIN_TRUNCPD256,
30423 IX86_BUILTIN_RINTPD256,
30424 IX86_BUILTIN_ROUNDPD_AZ256,
30426 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
30427 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
30428 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
30430 IX86_BUILTIN_FLOORPS256,
30431 IX86_BUILTIN_CEILPS256,
30432 IX86_BUILTIN_TRUNCPS256,
30433 IX86_BUILTIN_RINTPS256,
30434 IX86_BUILTIN_ROUNDPS_AZ256,
30436 IX86_BUILTIN_FLOORPS_SFIX256,
30437 IX86_BUILTIN_CEILPS_SFIX256,
30438 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
30440 IX86_BUILTIN_UNPCKHPD256,
30441 IX86_BUILTIN_UNPCKLPD256,
30442 IX86_BUILTIN_UNPCKHPS256,
30443 IX86_BUILTIN_UNPCKLPS256,
30445 IX86_BUILTIN_SI256_SI,
30446 IX86_BUILTIN_PS256_PS,
30447 IX86_BUILTIN_PD256_PD,
30448 IX86_BUILTIN_SI_SI256,
30449 IX86_BUILTIN_PS_PS256,
30450 IX86_BUILTIN_PD_PD256,
30452 IX86_BUILTIN_VTESTZPD,
30453 IX86_BUILTIN_VTESTCPD,
30454 IX86_BUILTIN_VTESTNZCPD,
30455 IX86_BUILTIN_VTESTZPS,
30456 IX86_BUILTIN_VTESTCPS,
30457 IX86_BUILTIN_VTESTNZCPS,
30458 IX86_BUILTIN_VTESTZPD256,
30459 IX86_BUILTIN_VTESTCPD256,
30460 IX86_BUILTIN_VTESTNZCPD256,
30461 IX86_BUILTIN_VTESTZPS256,
30462 IX86_BUILTIN_VTESTCPS256,
30463 IX86_BUILTIN_VTESTNZCPS256,
30464 IX86_BUILTIN_PTESTZ256,
30465 IX86_BUILTIN_PTESTC256,
30466 IX86_BUILTIN_PTESTNZC256,
30468 IX86_BUILTIN_MOVMSKPD256,
30469 IX86_BUILTIN_MOVMSKPS256,
30472 IX86_BUILTIN_MPSADBW256,
30473 IX86_BUILTIN_PABSB256,
30474 IX86_BUILTIN_PABSW256,
30475 IX86_BUILTIN_PABSD256,
30476 IX86_BUILTIN_PACKSSDW256,
30477 IX86_BUILTIN_PACKSSWB256,
30478 IX86_BUILTIN_PACKUSDW256,
30479 IX86_BUILTIN_PACKUSWB256,
30480 IX86_BUILTIN_PADDB256,
30481 IX86_BUILTIN_PADDW256,
30482 IX86_BUILTIN_PADDD256,
30483 IX86_BUILTIN_PADDQ256,
30484 IX86_BUILTIN_PADDSB256,
30485 IX86_BUILTIN_PADDSW256,
30486 IX86_BUILTIN_PADDUSB256,
30487 IX86_BUILTIN_PADDUSW256,
30488 IX86_BUILTIN_PALIGNR256,
30489 IX86_BUILTIN_AND256I,
30490 IX86_BUILTIN_ANDNOT256I,
30491 IX86_BUILTIN_PAVGB256,
30492 IX86_BUILTIN_PAVGW256,
30493 IX86_BUILTIN_PBLENDVB256,
30494 IX86_BUILTIN_PBLENDVW256,
30495 IX86_BUILTIN_PCMPEQB256,
30496 IX86_BUILTIN_PCMPEQW256,
30497 IX86_BUILTIN_PCMPEQD256,
30498 IX86_BUILTIN_PCMPEQQ256,
30499 IX86_BUILTIN_PCMPGTB256,
30500 IX86_BUILTIN_PCMPGTW256,
30501 IX86_BUILTIN_PCMPGTD256,
30502 IX86_BUILTIN_PCMPGTQ256,
30503 IX86_BUILTIN_PHADDW256,
30504 IX86_BUILTIN_PHADDD256,
30505 IX86_BUILTIN_PHADDSW256,
30506 IX86_BUILTIN_PHSUBW256,
30507 IX86_BUILTIN_PHSUBD256,
30508 IX86_BUILTIN_PHSUBSW256,
30509 IX86_BUILTIN_PMADDUBSW256,
30510 IX86_BUILTIN_PMADDWD256,
30511 IX86_BUILTIN_PMAXSB256,
30512 IX86_BUILTIN_PMAXSW256,
30513 IX86_BUILTIN_PMAXSD256,
30514 IX86_BUILTIN_PMAXUB256,
30515 IX86_BUILTIN_PMAXUW256,
30516 IX86_BUILTIN_PMAXUD256,
30517 IX86_BUILTIN_PMINSB256,
30518 IX86_BUILTIN_PMINSW256,
30519 IX86_BUILTIN_PMINSD256,
30520 IX86_BUILTIN_PMINUB256,
30521 IX86_BUILTIN_PMINUW256,
30522 IX86_BUILTIN_PMINUD256,
30523 IX86_BUILTIN_PMOVMSKB256,
30524 IX86_BUILTIN_PMOVSXBW256,
30525 IX86_BUILTIN_PMOVSXBD256,
30526 IX86_BUILTIN_PMOVSXBQ256,
30527 IX86_BUILTIN_PMOVSXWD256,
30528 IX86_BUILTIN_PMOVSXWQ256,
30529 IX86_BUILTIN_PMOVSXDQ256,
30530 IX86_BUILTIN_PMOVZXBW256,
30531 IX86_BUILTIN_PMOVZXBD256,
30532 IX86_BUILTIN_PMOVZXBQ256,
30533 IX86_BUILTIN_PMOVZXWD256,
30534 IX86_BUILTIN_PMOVZXWQ256,
30535 IX86_BUILTIN_PMOVZXDQ256,
30536 IX86_BUILTIN_PMULDQ256,
30537 IX86_BUILTIN_PMULHRSW256,
30538 IX86_BUILTIN_PMULHUW256,
30539 IX86_BUILTIN_PMULHW256,
30540 IX86_BUILTIN_PMULLW256,
30541 IX86_BUILTIN_PMULLD256,
30542 IX86_BUILTIN_PMULUDQ256,
30543 IX86_BUILTIN_POR256,
30544 IX86_BUILTIN_PSADBW256,
30545 IX86_BUILTIN_PSHUFB256,
30546 IX86_BUILTIN_PSHUFD256,
30547 IX86_BUILTIN_PSHUFHW256,
30548 IX86_BUILTIN_PSHUFLW256,
30549 IX86_BUILTIN_PSIGNB256,
30550 IX86_BUILTIN_PSIGNW256,
30551 IX86_BUILTIN_PSIGND256,
30552 IX86_BUILTIN_PSLLDQI256,
30553 IX86_BUILTIN_PSLLWI256,
30554 IX86_BUILTIN_PSLLW256,
30555 IX86_BUILTIN_PSLLDI256,
30556 IX86_BUILTIN_PSLLD256,
30557 IX86_BUILTIN_PSLLQI256,
30558 IX86_BUILTIN_PSLLQ256,
30559 IX86_BUILTIN_PSRAWI256,
30560 IX86_BUILTIN_PSRAW256,
30561 IX86_BUILTIN_PSRADI256,
30562 IX86_BUILTIN_PSRAD256,
30563 IX86_BUILTIN_PSRLDQI256,
30564 IX86_BUILTIN_PSRLWI256,
30565 IX86_BUILTIN_PSRLW256,
30566 IX86_BUILTIN_PSRLDI256,
30567 IX86_BUILTIN_PSRLD256,
30568 IX86_BUILTIN_PSRLQI256,
30569 IX86_BUILTIN_PSRLQ256,
30570 IX86_BUILTIN_PSUBB256,
30571 IX86_BUILTIN_PSUBW256,
30572 IX86_BUILTIN_PSUBD256,
30573 IX86_BUILTIN_PSUBQ256,
30574 IX86_BUILTIN_PSUBSB256,
30575 IX86_BUILTIN_PSUBSW256,
30576 IX86_BUILTIN_PSUBUSB256,
30577 IX86_BUILTIN_PSUBUSW256,
30578 IX86_BUILTIN_PUNPCKHBW256,
30579 IX86_BUILTIN_PUNPCKHWD256,
30580 IX86_BUILTIN_PUNPCKHDQ256,
30581 IX86_BUILTIN_PUNPCKHQDQ256,
30582 IX86_BUILTIN_PUNPCKLBW256,
30583 IX86_BUILTIN_PUNPCKLWD256,
30584 IX86_BUILTIN_PUNPCKLDQ256,
30585 IX86_BUILTIN_PUNPCKLQDQ256,
30586 IX86_BUILTIN_PXOR256,
30587 IX86_BUILTIN_MOVNTDQA256,
30588 IX86_BUILTIN_VBROADCASTSS_PS,
30589 IX86_BUILTIN_VBROADCASTSS_PS256,
30590 IX86_BUILTIN_VBROADCASTSD_PD256,
30591 IX86_BUILTIN_VBROADCASTSI256,
30592 IX86_BUILTIN_PBLENDD256,
30593 IX86_BUILTIN_PBLENDD128,
30594 IX86_BUILTIN_PBROADCASTB256,
30595 IX86_BUILTIN_PBROADCASTW256,
30596 IX86_BUILTIN_PBROADCASTD256,
30597 IX86_BUILTIN_PBROADCASTQ256,
30598 IX86_BUILTIN_PBROADCASTB128,
30599 IX86_BUILTIN_PBROADCASTW128,
30600 IX86_BUILTIN_PBROADCASTD128,
30601 IX86_BUILTIN_PBROADCASTQ128,
30602 IX86_BUILTIN_VPERMVARSI256,
30603 IX86_BUILTIN_VPERMDF256,
30604 IX86_BUILTIN_VPERMVARSF256,
30605 IX86_BUILTIN_VPERMDI256,
30606 IX86_BUILTIN_VPERMTI256,
30607 IX86_BUILTIN_VEXTRACT128I256,
30608 IX86_BUILTIN_VINSERT128I256,
30609 IX86_BUILTIN_MASKLOADD,
30610 IX86_BUILTIN_MASKLOADQ,
30611 IX86_BUILTIN_MASKLOADD256,
30612 IX86_BUILTIN_MASKLOADQ256,
30613 IX86_BUILTIN_MASKSTORED,
30614 IX86_BUILTIN_MASKSTOREQ,
30615 IX86_BUILTIN_MASKSTORED256,
30616 IX86_BUILTIN_MASKSTOREQ256,
30617 IX86_BUILTIN_PSLLVV4DI,
30618 IX86_BUILTIN_PSLLVV2DI,
30619 IX86_BUILTIN_PSLLVV8SI,
30620 IX86_BUILTIN_PSLLVV4SI,
30621 IX86_BUILTIN_PSRAVV8SI,
30622 IX86_BUILTIN_PSRAVV4SI,
30623 IX86_BUILTIN_PSRLVV4DI,
30624 IX86_BUILTIN_PSRLVV2DI,
30625 IX86_BUILTIN_PSRLVV8SI,
30626 IX86_BUILTIN_PSRLVV4SI,
30628 IX86_BUILTIN_GATHERSIV2DF,
30629 IX86_BUILTIN_GATHERSIV4DF,
30630 IX86_BUILTIN_GATHERDIV2DF,
30631 IX86_BUILTIN_GATHERDIV4DF,
30632 IX86_BUILTIN_GATHERSIV4SF,
30633 IX86_BUILTIN_GATHERSIV8SF,
30634 IX86_BUILTIN_GATHERDIV4SF,
30635 IX86_BUILTIN_GATHERDIV8SF,
30636 IX86_BUILTIN_GATHERSIV2DI,
30637 IX86_BUILTIN_GATHERSIV4DI,
30638 IX86_BUILTIN_GATHERDIV2DI,
30639 IX86_BUILTIN_GATHERDIV4DI,
30640 IX86_BUILTIN_GATHERSIV4SI,
30641 IX86_BUILTIN_GATHERSIV8SI,
30642 IX86_BUILTIN_GATHERDIV4SI,
30643 IX86_BUILTIN_GATHERDIV8SI,
30646 IX86_BUILTIN_SI512_SI256,
30647 IX86_BUILTIN_PD512_PD256,
30648 IX86_BUILTIN_PS512_PS256,
30649 IX86_BUILTIN_SI512_SI,
30650 IX86_BUILTIN_PD512_PD,
30651 IX86_BUILTIN_PS512_PS,
30652 IX86_BUILTIN_ADDPD512,
30653 IX86_BUILTIN_ADDPS512,
30654 IX86_BUILTIN_ADDSD_ROUND,
30655 IX86_BUILTIN_ADDSS_ROUND,
30656 IX86_BUILTIN_ALIGND512,
30657 IX86_BUILTIN_ALIGNQ512,
30658 IX86_BUILTIN_BLENDMD512,
30659 IX86_BUILTIN_BLENDMPD512,
30660 IX86_BUILTIN_BLENDMPS512,
30661 IX86_BUILTIN_BLENDMQ512,
30662 IX86_BUILTIN_BROADCASTF32X4_512,
30663 IX86_BUILTIN_BROADCASTF64X4_512,
30664 IX86_BUILTIN_BROADCASTI32X4_512,
30665 IX86_BUILTIN_BROADCASTI64X4_512,
30666 IX86_BUILTIN_BROADCASTSD512,
30667 IX86_BUILTIN_BROADCASTSS512,
30668 IX86_BUILTIN_CMPD512,
30669 IX86_BUILTIN_CMPPD512,
30670 IX86_BUILTIN_CMPPS512,
30671 IX86_BUILTIN_CMPQ512,
30672 IX86_BUILTIN_CMPSD_MASK,
30673 IX86_BUILTIN_CMPSS_MASK,
30674 IX86_BUILTIN_COMIDF,
30675 IX86_BUILTIN_COMISF,
30676 IX86_BUILTIN_COMPRESSPD512,
30677 IX86_BUILTIN_COMPRESSPDSTORE512,
30678 IX86_BUILTIN_COMPRESSPS512,
30679 IX86_BUILTIN_COMPRESSPSSTORE512,
30680 IX86_BUILTIN_CVTDQ2PD512,
30681 IX86_BUILTIN_CVTDQ2PS512,
30682 IX86_BUILTIN_CVTPD2DQ512,
30683 IX86_BUILTIN_CVTPD2PS512,
30684 IX86_BUILTIN_CVTPD2UDQ512,
30685 IX86_BUILTIN_CVTPH2PS512,
30686 IX86_BUILTIN_CVTPS2DQ512,
30687 IX86_BUILTIN_CVTPS2PD512,
30688 IX86_BUILTIN_CVTPS2PH512,
30689 IX86_BUILTIN_CVTPS2UDQ512,
30690 IX86_BUILTIN_CVTSD2SS_ROUND,
30691 IX86_BUILTIN_CVTSI2SD64,
30692 IX86_BUILTIN_CVTSI2SS32,
30693 IX86_BUILTIN_CVTSI2SS64,
30694 IX86_BUILTIN_CVTSS2SD_ROUND,
30695 IX86_BUILTIN_CVTTPD2DQ512,
30696 IX86_BUILTIN_CVTTPD2UDQ512,
30697 IX86_BUILTIN_CVTTPS2DQ512,
30698 IX86_BUILTIN_CVTTPS2UDQ512,
30699 IX86_BUILTIN_CVTUDQ2PD512,
30700 IX86_BUILTIN_CVTUDQ2PS512,
30701 IX86_BUILTIN_CVTUSI2SD32,
30702 IX86_BUILTIN_CVTUSI2SD64,
30703 IX86_BUILTIN_CVTUSI2SS32,
30704 IX86_BUILTIN_CVTUSI2SS64,
30705 IX86_BUILTIN_DIVPD512,
30706 IX86_BUILTIN_DIVPS512,
30707 IX86_BUILTIN_DIVSD_ROUND,
30708 IX86_BUILTIN_DIVSS_ROUND,
30709 IX86_BUILTIN_EXPANDPD512,
30710 IX86_BUILTIN_EXPANDPD512Z,
30711 IX86_BUILTIN_EXPANDPDLOAD512,
30712 IX86_BUILTIN_EXPANDPDLOAD512Z,
30713 IX86_BUILTIN_EXPANDPS512,
30714 IX86_BUILTIN_EXPANDPS512Z,
30715 IX86_BUILTIN_EXPANDPSLOAD512,
30716 IX86_BUILTIN_EXPANDPSLOAD512Z,
30717 IX86_BUILTIN_EXTRACTF32X4,
30718 IX86_BUILTIN_EXTRACTF64X4,
30719 IX86_BUILTIN_EXTRACTI32X4,
30720 IX86_BUILTIN_EXTRACTI64X4,
30721 IX86_BUILTIN_FIXUPIMMPD512_MASK,
30722 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
30723 IX86_BUILTIN_FIXUPIMMPS512_MASK,
30724 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
30725 IX86_BUILTIN_FIXUPIMMSD128_MASK,
30726 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
30727 IX86_BUILTIN_FIXUPIMMSS128_MASK,
30728 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
30729 IX86_BUILTIN_GETEXPPD512,
30730 IX86_BUILTIN_GETEXPPS512,
30731 IX86_BUILTIN_GETEXPSD128,
30732 IX86_BUILTIN_GETEXPSS128,
30733 IX86_BUILTIN_GETMANTPD512,
30734 IX86_BUILTIN_GETMANTPS512,
30735 IX86_BUILTIN_GETMANTSD128,
30736 IX86_BUILTIN_GETMANTSS128,
30737 IX86_BUILTIN_INSERTF32X4,
30738 IX86_BUILTIN_INSERTF64X4,
30739 IX86_BUILTIN_INSERTI32X4,
30740 IX86_BUILTIN_INSERTI64X4,
30741 IX86_BUILTIN_LOADAPD512,
30742 IX86_BUILTIN_LOADAPS512,
30743 IX86_BUILTIN_LOADDQUDI512,
30744 IX86_BUILTIN_LOADDQUSI512,
30745 IX86_BUILTIN_LOADUPD512,
30746 IX86_BUILTIN_LOADUPS512,
30747 IX86_BUILTIN_MAXPD512,
30748 IX86_BUILTIN_MAXPS512,
30749 IX86_BUILTIN_MAXSD_ROUND,
30750 IX86_BUILTIN_MAXSS_ROUND,
30751 IX86_BUILTIN_MINPD512,
30752 IX86_BUILTIN_MINPS512,
30753 IX86_BUILTIN_MINSD_ROUND,
30754 IX86_BUILTIN_MINSS_ROUND,
30755 IX86_BUILTIN_MOVAPD512,
30756 IX86_BUILTIN_MOVAPS512,
30757 IX86_BUILTIN_MOVDDUP512,
30758 IX86_BUILTIN_MOVDQA32LOAD512,
30759 IX86_BUILTIN_MOVDQA32STORE512,
30760 IX86_BUILTIN_MOVDQA32_512,
30761 IX86_BUILTIN_MOVDQA64LOAD512,
30762 IX86_BUILTIN_MOVDQA64STORE512,
30763 IX86_BUILTIN_MOVDQA64_512,
30764 IX86_BUILTIN_MOVNTDQ512,
30765 IX86_BUILTIN_MOVNTDQA512,
30766 IX86_BUILTIN_MOVNTPD512,
30767 IX86_BUILTIN_MOVNTPS512,
30768 IX86_BUILTIN_MOVSHDUP512,
30769 IX86_BUILTIN_MOVSLDUP512,
30770 IX86_BUILTIN_MULPD512,
30771 IX86_BUILTIN_MULPS512,
30772 IX86_BUILTIN_MULSD_ROUND,
30773 IX86_BUILTIN_MULSS_ROUND,
30774 IX86_BUILTIN_PABSD512,
30775 IX86_BUILTIN_PABSQ512,
30776 IX86_BUILTIN_PADDD512,
30777 IX86_BUILTIN_PADDQ512,
30778 IX86_BUILTIN_PANDD512,
30779 IX86_BUILTIN_PANDND512,
30780 IX86_BUILTIN_PANDNQ512,
30781 IX86_BUILTIN_PANDQ512,
30782 IX86_BUILTIN_PBROADCASTD512,
30783 IX86_BUILTIN_PBROADCASTD512_GPR,
30784 IX86_BUILTIN_PBROADCASTMB512,
30785 IX86_BUILTIN_PBROADCASTMW512,
30786 IX86_BUILTIN_PBROADCASTQ512,
30787 IX86_BUILTIN_PBROADCASTQ512_GPR,
30788 IX86_BUILTIN_PCMPEQD512_MASK,
30789 IX86_BUILTIN_PCMPEQQ512_MASK,
30790 IX86_BUILTIN_PCMPGTD512_MASK,
30791 IX86_BUILTIN_PCMPGTQ512_MASK,
30792 IX86_BUILTIN_PCOMPRESSD512,
30793 IX86_BUILTIN_PCOMPRESSDSTORE512,
30794 IX86_BUILTIN_PCOMPRESSQ512,
30795 IX86_BUILTIN_PCOMPRESSQSTORE512,
30796 IX86_BUILTIN_PEXPANDD512,
30797 IX86_BUILTIN_PEXPANDD512Z,
30798 IX86_BUILTIN_PEXPANDDLOAD512,
30799 IX86_BUILTIN_PEXPANDDLOAD512Z,
30800 IX86_BUILTIN_PEXPANDQ512,
30801 IX86_BUILTIN_PEXPANDQ512Z,
30802 IX86_BUILTIN_PEXPANDQLOAD512,
30803 IX86_BUILTIN_PEXPANDQLOAD512Z,
30804 IX86_BUILTIN_PMAXSD512,
30805 IX86_BUILTIN_PMAXSQ512,
30806 IX86_BUILTIN_PMAXUD512,
30807 IX86_BUILTIN_PMAXUQ512,
30808 IX86_BUILTIN_PMINSD512,
30809 IX86_BUILTIN_PMINSQ512,
30810 IX86_BUILTIN_PMINUD512,
30811 IX86_BUILTIN_PMINUQ512,
30812 IX86_BUILTIN_PMOVDB512,
30813 IX86_BUILTIN_PMOVDB512_MEM,
30814 IX86_BUILTIN_PMOVDW512,
30815 IX86_BUILTIN_PMOVDW512_MEM,
30816 IX86_BUILTIN_PMOVQB512,
30817 IX86_BUILTIN_PMOVQB512_MEM,
30818 IX86_BUILTIN_PMOVQD512,
30819 IX86_BUILTIN_PMOVQD512_MEM,
30820 IX86_BUILTIN_PMOVQW512,
30821 IX86_BUILTIN_PMOVQW512_MEM,
30822 IX86_BUILTIN_PMOVSDB512,
30823 IX86_BUILTIN_PMOVSDB512_MEM,
30824 IX86_BUILTIN_PMOVSDW512,
30825 IX86_BUILTIN_PMOVSDW512_MEM,
30826 IX86_BUILTIN_PMOVSQB512,
30827 IX86_BUILTIN_PMOVSQB512_MEM,
30828 IX86_BUILTIN_PMOVSQD512,
30829 IX86_BUILTIN_PMOVSQD512_MEM,
30830 IX86_BUILTIN_PMOVSQW512,
30831 IX86_BUILTIN_PMOVSQW512_MEM,
30832 IX86_BUILTIN_PMOVSXBD512,
30833 IX86_BUILTIN_PMOVSXBQ512,
30834 IX86_BUILTIN_PMOVSXDQ512,
30835 IX86_BUILTIN_PMOVSXWD512,
30836 IX86_BUILTIN_PMOVSXWQ512,
30837 IX86_BUILTIN_PMOVUSDB512,
30838 IX86_BUILTIN_PMOVUSDB512_MEM,
30839 IX86_BUILTIN_PMOVUSDW512,
30840 IX86_BUILTIN_PMOVUSDW512_MEM,
30841 IX86_BUILTIN_PMOVUSQB512,
30842 IX86_BUILTIN_PMOVUSQB512_MEM,
30843 IX86_BUILTIN_PMOVUSQD512,
30844 IX86_BUILTIN_PMOVUSQD512_MEM,
30845 IX86_BUILTIN_PMOVUSQW512,
30846 IX86_BUILTIN_PMOVUSQW512_MEM,
30847 IX86_BUILTIN_PMOVZXBD512,
30848 IX86_BUILTIN_PMOVZXBQ512,
30849 IX86_BUILTIN_PMOVZXDQ512,
30850 IX86_BUILTIN_PMOVZXWD512,
30851 IX86_BUILTIN_PMOVZXWQ512,
30852 IX86_BUILTIN_PMULDQ512,
30853 IX86_BUILTIN_PMULLD512,
30854 IX86_BUILTIN_PMULUDQ512,
30855 IX86_BUILTIN_PORD512,
30856 IX86_BUILTIN_PORQ512,
30857 IX86_BUILTIN_PROLD512,
30858 IX86_BUILTIN_PROLQ512,
30859 IX86_BUILTIN_PROLVD512,
30860 IX86_BUILTIN_PROLVQ512,
30861 IX86_BUILTIN_PRORD512,
30862 IX86_BUILTIN_PRORQ512,
30863 IX86_BUILTIN_PRORVD512,
30864 IX86_BUILTIN_PRORVQ512,
30865 IX86_BUILTIN_PSHUFD512,
30866 IX86_BUILTIN_PSLLD512,
30867 IX86_BUILTIN_PSLLDI512,
30868 IX86_BUILTIN_PSLLQ512,
30869 IX86_BUILTIN_PSLLQI512,
30870 IX86_BUILTIN_PSLLVV16SI,
30871 IX86_BUILTIN_PSLLVV8DI,
30872 IX86_BUILTIN_PSRAD512,
30873 IX86_BUILTIN_PSRADI512,
30874 IX86_BUILTIN_PSRAQ512,
30875 IX86_BUILTIN_PSRAQI512,
30876 IX86_BUILTIN_PSRAVV16SI,
30877 IX86_BUILTIN_PSRAVV8DI,
30878 IX86_BUILTIN_PSRLD512,
30879 IX86_BUILTIN_PSRLDI512,
30880 IX86_BUILTIN_PSRLQ512,
30881 IX86_BUILTIN_PSRLQI512,
30882 IX86_BUILTIN_PSRLVV16SI,
30883 IX86_BUILTIN_PSRLVV8DI,
30884 IX86_BUILTIN_PSUBD512,
30885 IX86_BUILTIN_PSUBQ512,
30886 IX86_BUILTIN_PTESTMD512,
30887 IX86_BUILTIN_PTESTMQ512,
30888 IX86_BUILTIN_PTESTNMD512,
30889 IX86_BUILTIN_PTESTNMQ512,
30890 IX86_BUILTIN_PUNPCKHDQ512,
30891 IX86_BUILTIN_PUNPCKHQDQ512,
30892 IX86_BUILTIN_PUNPCKLDQ512,
30893 IX86_BUILTIN_PUNPCKLQDQ512,
30894 IX86_BUILTIN_PXORD512,
30895 IX86_BUILTIN_PXORQ512,
30896 IX86_BUILTIN_RCP14PD512,
30897 IX86_BUILTIN_RCP14PS512,
30898 IX86_BUILTIN_RCP14SD,
30899 IX86_BUILTIN_RCP14SS,
30900 IX86_BUILTIN_RNDSCALEPD,
30901 IX86_BUILTIN_RNDSCALEPS,
30902 IX86_BUILTIN_RNDSCALESD,
30903 IX86_BUILTIN_RNDSCALESS,
30904 IX86_BUILTIN_RSQRT14PD512,
30905 IX86_BUILTIN_RSQRT14PS512,
30906 IX86_BUILTIN_RSQRT14SD,
30907 IX86_BUILTIN_RSQRT14SS,
30908 IX86_BUILTIN_SCALEFPD512,
30909 IX86_BUILTIN_SCALEFPS512,
30910 IX86_BUILTIN_SCALEFSD,
30911 IX86_BUILTIN_SCALEFSS,
30912 IX86_BUILTIN_SHUFPD512,
30913 IX86_BUILTIN_SHUFPS512,
30914 IX86_BUILTIN_SHUF_F32x4,
30915 IX86_BUILTIN_SHUF_F64x2,
30916 IX86_BUILTIN_SHUF_I32x4,
30917 IX86_BUILTIN_SHUF_I64x2,
30918 IX86_BUILTIN_SQRTPD512,
30919 IX86_BUILTIN_SQRTPD512_MASK,
30920 IX86_BUILTIN_SQRTPS512_MASK,
30921 IX86_BUILTIN_SQRTPS_NR512,
30922 IX86_BUILTIN_SQRTSD_ROUND,
30923 IX86_BUILTIN_SQRTSS_ROUND,
30924 IX86_BUILTIN_STOREAPD512,
30925 IX86_BUILTIN_STOREAPS512,
30926 IX86_BUILTIN_STOREDQUDI512,
30927 IX86_BUILTIN_STOREDQUSI512,
30928 IX86_BUILTIN_STOREUPD512,
30929 IX86_BUILTIN_STOREUPS512,
30930 IX86_BUILTIN_SUBPD512,
30931 IX86_BUILTIN_SUBPS512,
30932 IX86_BUILTIN_SUBSD_ROUND,
30933 IX86_BUILTIN_SUBSS_ROUND,
30934 IX86_BUILTIN_UCMPD512,
30935 IX86_BUILTIN_UCMPQ512,
30936 IX86_BUILTIN_UNPCKHPD512,
30937 IX86_BUILTIN_UNPCKHPS512,
30938 IX86_BUILTIN_UNPCKLPD512,
30939 IX86_BUILTIN_UNPCKLPS512,
30940 IX86_BUILTIN_VCVTSD2SI32,
30941 IX86_BUILTIN_VCVTSD2SI64,
30942 IX86_BUILTIN_VCVTSD2USI32,
30943 IX86_BUILTIN_VCVTSD2USI64,
30944 IX86_BUILTIN_VCVTSS2SI32,
30945 IX86_BUILTIN_VCVTSS2SI64,
30946 IX86_BUILTIN_VCVTSS2USI32,
30947 IX86_BUILTIN_VCVTSS2USI64,
30948 IX86_BUILTIN_VCVTTSD2SI32,
30949 IX86_BUILTIN_VCVTTSD2SI64,
30950 IX86_BUILTIN_VCVTTSD2USI32,
30951 IX86_BUILTIN_VCVTTSD2USI64,
30952 IX86_BUILTIN_VCVTTSS2SI32,
30953 IX86_BUILTIN_VCVTTSS2SI64,
30954 IX86_BUILTIN_VCVTTSS2USI32,
30955 IX86_BUILTIN_VCVTTSS2USI64,
30956 IX86_BUILTIN_VFMADDPD512_MASK,
30957 IX86_BUILTIN_VFMADDPD512_MASK3,
30958 IX86_BUILTIN_VFMADDPD512_MASKZ,
30959 IX86_BUILTIN_VFMADDPS512_MASK,
30960 IX86_BUILTIN_VFMADDPS512_MASK3,
30961 IX86_BUILTIN_VFMADDPS512_MASKZ,
30962 IX86_BUILTIN_VFMADDSD3_ROUND,
30963 IX86_BUILTIN_VFMADDSS3_ROUND,
30964 IX86_BUILTIN_VFMADDSUBPD512_MASK,
30965 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
30966 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
30967 IX86_BUILTIN_VFMADDSUBPS512_MASK,
30968 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
30969 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
30970 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
30971 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
30972 IX86_BUILTIN_VFMSUBPD512_MASK3,
30973 IX86_BUILTIN_VFMSUBPS512_MASK3,
30974 IX86_BUILTIN_VFMSUBSD3_MASK3,
30975 IX86_BUILTIN_VFMSUBSS3_MASK3,
30976 IX86_BUILTIN_VFNMADDPD512_MASK,
30977 IX86_BUILTIN_VFNMADDPS512_MASK,
30978 IX86_BUILTIN_VFNMSUBPD512_MASK,
30979 IX86_BUILTIN_VFNMSUBPD512_MASK3,
30980 IX86_BUILTIN_VFNMSUBPS512_MASK,
30981 IX86_BUILTIN_VFNMSUBPS512_MASK3,
30982 IX86_BUILTIN_VPCLZCNTD512,
30983 IX86_BUILTIN_VPCLZCNTQ512,
30984 IX86_BUILTIN_VPCONFLICTD512,
30985 IX86_BUILTIN_VPCONFLICTQ512,
30986 IX86_BUILTIN_VPERMDF512,
30987 IX86_BUILTIN_VPERMDI512,
30988 IX86_BUILTIN_VPERMI2VARD512,
30989 IX86_BUILTIN_VPERMI2VARPD512,
30990 IX86_BUILTIN_VPERMI2VARPS512,
30991 IX86_BUILTIN_VPERMI2VARQ512,
30992 IX86_BUILTIN_VPERMILPD512,
30993 IX86_BUILTIN_VPERMILPS512,
30994 IX86_BUILTIN_VPERMILVARPD512,
30995 IX86_BUILTIN_VPERMILVARPS512,
30996 IX86_BUILTIN_VPERMT2VARD512,
30997 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
30998 IX86_BUILTIN_VPERMT2VARPD512,
30999 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
31000 IX86_BUILTIN_VPERMT2VARPS512,
31001 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
31002 IX86_BUILTIN_VPERMT2VARQ512,
31003 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
31004 IX86_BUILTIN_VPERMVARDF512,
31005 IX86_BUILTIN_VPERMVARDI512,
31006 IX86_BUILTIN_VPERMVARSF512,
31007 IX86_BUILTIN_VPERMVARSI512,
31008 IX86_BUILTIN_VTERNLOGD512_MASK,
31009 IX86_BUILTIN_VTERNLOGD512_MASKZ,
31010 IX86_BUILTIN_VTERNLOGQ512_MASK,
31011 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
31013 /* Mask arithmetic operations */
31014 IX86_BUILTIN_KAND16,
31015 IX86_BUILTIN_KANDN16,
31016 IX86_BUILTIN_KNOT16,
31017 IX86_BUILTIN_KOR16,
31018 IX86_BUILTIN_KORTESTC16,
31019 IX86_BUILTIN_KORTESTZ16,
31020 IX86_BUILTIN_KUNPCKBW,
31021 IX86_BUILTIN_KXNOR16,
31022 IX86_BUILTIN_KXOR16,
31023 IX86_BUILTIN_KMOV16,
31026 IX86_BUILTIN_PMOVUSQD256_MEM,
31027 IX86_BUILTIN_PMOVUSQD128_MEM,
31028 IX86_BUILTIN_PMOVSQD256_MEM,
31029 IX86_BUILTIN_PMOVSQD128_MEM,
31030 IX86_BUILTIN_PMOVQD256_MEM,
31031 IX86_BUILTIN_PMOVQD128_MEM,
31032 IX86_BUILTIN_PMOVUSQW256_MEM,
31033 IX86_BUILTIN_PMOVUSQW128_MEM,
31034 IX86_BUILTIN_PMOVSQW256_MEM,
31035 IX86_BUILTIN_PMOVSQW128_MEM,
31036 IX86_BUILTIN_PMOVQW256_MEM,
31037 IX86_BUILTIN_PMOVQW128_MEM,
31038 IX86_BUILTIN_PMOVUSQB256_MEM,
31039 IX86_BUILTIN_PMOVUSQB128_MEM,
31040 IX86_BUILTIN_PMOVSQB256_MEM,
31041 IX86_BUILTIN_PMOVSQB128_MEM,
31042 IX86_BUILTIN_PMOVQB256_MEM,
31043 IX86_BUILTIN_PMOVQB128_MEM,
31044 IX86_BUILTIN_PMOVUSDW256_MEM,
31045 IX86_BUILTIN_PMOVUSDW128_MEM,
31046 IX86_BUILTIN_PMOVSDW256_MEM,
31047 IX86_BUILTIN_PMOVSDW128_MEM,
31048 IX86_BUILTIN_PMOVDW256_MEM,
31049 IX86_BUILTIN_PMOVDW128_MEM,
31050 IX86_BUILTIN_PMOVUSDB256_MEM,
31051 IX86_BUILTIN_PMOVUSDB128_MEM,
31052 IX86_BUILTIN_PMOVSDB256_MEM,
31053 IX86_BUILTIN_PMOVSDB128_MEM,
31054 IX86_BUILTIN_PMOVDB256_MEM,
31055 IX86_BUILTIN_PMOVDB128_MEM,
31056 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
31057 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
31058 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
31059 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
31060 IX86_BUILTIN_MOVDQA64STORE256_MASK,
31061 IX86_BUILTIN_MOVDQA64STORE128_MASK,
31062 IX86_BUILTIN_MOVDQA32STORE256_MASK,
31063 IX86_BUILTIN_MOVDQA32STORE128_MASK,
31064 IX86_BUILTIN_LOADAPD256_MASK,
31065 IX86_BUILTIN_LOADAPD128_MASK,
31066 IX86_BUILTIN_LOADAPS256_MASK,
31067 IX86_BUILTIN_LOADAPS128_MASK,
31068 IX86_BUILTIN_STOREAPD256_MASK,
31069 IX86_BUILTIN_STOREAPD128_MASK,
31070 IX86_BUILTIN_STOREAPS256_MASK,
31071 IX86_BUILTIN_STOREAPS128_MASK,
31072 IX86_BUILTIN_LOADUPD256_MASK,
31073 IX86_BUILTIN_LOADUPD128_MASK,
31074 IX86_BUILTIN_LOADUPS256_MASK,
31075 IX86_BUILTIN_LOADUPS128_MASK,
31076 IX86_BUILTIN_STOREUPD256_MASK,
31077 IX86_BUILTIN_STOREUPD128_MASK,
31078 IX86_BUILTIN_STOREUPS256_MASK,
31079 IX86_BUILTIN_STOREUPS128_MASK,
31080 IX86_BUILTIN_LOADDQUDI256_MASK,
31081 IX86_BUILTIN_LOADDQUDI128_MASK,
31082 IX86_BUILTIN_LOADDQUSI256_MASK,
31083 IX86_BUILTIN_LOADDQUSI128_MASK,
31084 IX86_BUILTIN_LOADDQUHI256_MASK,
31085 IX86_BUILTIN_LOADDQUHI128_MASK,
31086 IX86_BUILTIN_LOADDQUQI256_MASK,
31087 IX86_BUILTIN_LOADDQUQI128_MASK,
31088 IX86_BUILTIN_STOREDQUDI256_MASK,
31089 IX86_BUILTIN_STOREDQUDI128_MASK,
31090 IX86_BUILTIN_STOREDQUSI256_MASK,
31091 IX86_BUILTIN_STOREDQUSI128_MASK,
31092 IX86_BUILTIN_STOREDQUHI256_MASK,
31093 IX86_BUILTIN_STOREDQUHI128_MASK,
31094 IX86_BUILTIN_STOREDQUQI256_MASK,
31095 IX86_BUILTIN_STOREDQUQI128_MASK,
31096 IX86_BUILTIN_COMPRESSPDSTORE256,
31097 IX86_BUILTIN_COMPRESSPDSTORE128,
31098 IX86_BUILTIN_COMPRESSPSSTORE256,
31099 IX86_BUILTIN_COMPRESSPSSTORE128,
31100 IX86_BUILTIN_PCOMPRESSQSTORE256,
31101 IX86_BUILTIN_PCOMPRESSQSTORE128,
31102 IX86_BUILTIN_PCOMPRESSDSTORE256,
31103 IX86_BUILTIN_PCOMPRESSDSTORE128,
31104 IX86_BUILTIN_EXPANDPDLOAD256,
31105 IX86_BUILTIN_EXPANDPDLOAD128,
31106 IX86_BUILTIN_EXPANDPSLOAD256,
31107 IX86_BUILTIN_EXPANDPSLOAD128,
31108 IX86_BUILTIN_PEXPANDQLOAD256,
31109 IX86_BUILTIN_PEXPANDQLOAD128,
31110 IX86_BUILTIN_PEXPANDDLOAD256,
31111 IX86_BUILTIN_PEXPANDDLOAD128,
31112 IX86_BUILTIN_EXPANDPDLOAD256Z,
31113 IX86_BUILTIN_EXPANDPDLOAD128Z,
31114 IX86_BUILTIN_EXPANDPSLOAD256Z,
31115 IX86_BUILTIN_EXPANDPSLOAD128Z,
31116 IX86_BUILTIN_PEXPANDQLOAD256Z,
31117 IX86_BUILTIN_PEXPANDQLOAD128Z,
31118 IX86_BUILTIN_PEXPANDDLOAD256Z,
31119 IX86_BUILTIN_PEXPANDDLOAD128Z,
31120 IX86_BUILTIN_PALIGNR256_MASK,
31121 IX86_BUILTIN_PALIGNR128_MASK,
31122 IX86_BUILTIN_MOVDQA64_256_MASK,
31123 IX86_BUILTIN_MOVDQA64_128_MASK,
31124 IX86_BUILTIN_MOVDQA32_256_MASK,
31125 IX86_BUILTIN_MOVDQA32_128_MASK,
31126 IX86_BUILTIN_MOVAPD256_MASK,
31127 IX86_BUILTIN_MOVAPD128_MASK,
31128 IX86_BUILTIN_MOVAPS256_MASK,
31129 IX86_BUILTIN_MOVAPS128_MASK,
31130 IX86_BUILTIN_MOVDQUHI256_MASK,
31131 IX86_BUILTIN_MOVDQUHI128_MASK,
31132 IX86_BUILTIN_MOVDQUQI256_MASK,
31133 IX86_BUILTIN_MOVDQUQI128_MASK,
31134 IX86_BUILTIN_MINPS128_MASK,
31135 IX86_BUILTIN_MAXPS128_MASK,
31136 IX86_BUILTIN_MINPD128_MASK,
31137 IX86_BUILTIN_MAXPD128_MASK,
31138 IX86_BUILTIN_MAXPD256_MASK,
31139 IX86_BUILTIN_MAXPS256_MASK,
31140 IX86_BUILTIN_MINPD256_MASK,
31141 IX86_BUILTIN_MINPS256_MASK,
31142 IX86_BUILTIN_MULPS128_MASK,
31143 IX86_BUILTIN_DIVPS128_MASK,
31144 IX86_BUILTIN_MULPD128_MASK,
31145 IX86_BUILTIN_DIVPD128_MASK,
31146 IX86_BUILTIN_DIVPD256_MASK,
31147 IX86_BUILTIN_DIVPS256_MASK,
31148 IX86_BUILTIN_MULPD256_MASK,
31149 IX86_BUILTIN_MULPS256_MASK,
31150 IX86_BUILTIN_ADDPD128_MASK,
31151 IX86_BUILTIN_ADDPD256_MASK,
31152 IX86_BUILTIN_ADDPS128_MASK,
31153 IX86_BUILTIN_ADDPS256_MASK,
31154 IX86_BUILTIN_SUBPD128_MASK,
31155 IX86_BUILTIN_SUBPD256_MASK,
31156 IX86_BUILTIN_SUBPS128_MASK,
31157 IX86_BUILTIN_SUBPS256_MASK,
31158 IX86_BUILTIN_XORPD256_MASK,
31159 IX86_BUILTIN_XORPD128_MASK,
31160 IX86_BUILTIN_XORPS256_MASK,
31161 IX86_BUILTIN_XORPS128_MASK,
31162 IX86_BUILTIN_ORPD256_MASK,
31163 IX86_BUILTIN_ORPD128_MASK,
31164 IX86_BUILTIN_ORPS256_MASK,
31165 IX86_BUILTIN_ORPS128_MASK,
31166 IX86_BUILTIN_BROADCASTF32x2_256,
31167 IX86_BUILTIN_BROADCASTI32x2_256,
31168 IX86_BUILTIN_BROADCASTI32x2_128,
31169 IX86_BUILTIN_BROADCASTF64X2_256,
31170 IX86_BUILTIN_BROADCASTI64X2_256,
31171 IX86_BUILTIN_BROADCASTF32X4_256,
31172 IX86_BUILTIN_BROADCASTI32X4_256,
31173 IX86_BUILTIN_EXTRACTF32X4_256,
31174 IX86_BUILTIN_EXTRACTI32X4_256,
31175 IX86_BUILTIN_DBPSADBW256,
31176 IX86_BUILTIN_DBPSADBW128,
31177 IX86_BUILTIN_CVTTPD2QQ256,
31178 IX86_BUILTIN_CVTTPD2QQ128,
31179 IX86_BUILTIN_CVTTPD2UQQ256,
31180 IX86_BUILTIN_CVTTPD2UQQ128,
31181 IX86_BUILTIN_CVTPD2QQ256,
31182 IX86_BUILTIN_CVTPD2QQ128,
31183 IX86_BUILTIN_CVTPD2UQQ256,
31184 IX86_BUILTIN_CVTPD2UQQ128,
31185 IX86_BUILTIN_CVTPD2UDQ256_MASK,
31186 IX86_BUILTIN_CVTPD2UDQ128_MASK,
31187 IX86_BUILTIN_CVTTPS2QQ256,
31188 IX86_BUILTIN_CVTTPS2QQ128,
31189 IX86_BUILTIN_CVTTPS2UQQ256,
31190 IX86_BUILTIN_CVTTPS2UQQ128,
31191 IX86_BUILTIN_CVTTPS2DQ256_MASK,
31192 IX86_BUILTIN_CVTTPS2DQ128_MASK,
31193 IX86_BUILTIN_CVTTPS2UDQ256,
31194 IX86_BUILTIN_CVTTPS2UDQ128,
31195 IX86_BUILTIN_CVTTPD2DQ256_MASK,
31196 IX86_BUILTIN_CVTTPD2DQ128_MASK,
31197 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
31198 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
31199 IX86_BUILTIN_CVTPD2DQ256_MASK,
31200 IX86_BUILTIN_CVTPD2DQ128_MASK,
31201 IX86_BUILTIN_CVTDQ2PD256_MASK,
31202 IX86_BUILTIN_CVTDQ2PD128_MASK,
31203 IX86_BUILTIN_CVTUDQ2PD256_MASK,
31204 IX86_BUILTIN_CVTUDQ2PD128_MASK,
31205 IX86_BUILTIN_CVTDQ2PS256_MASK,
31206 IX86_BUILTIN_CVTDQ2PS128_MASK,
31207 IX86_BUILTIN_CVTUDQ2PS256_MASK,
31208 IX86_BUILTIN_CVTUDQ2PS128_MASK,
31209 IX86_BUILTIN_CVTPS2PD256_MASK,
31210 IX86_BUILTIN_CVTPS2PD128_MASK,
31211 IX86_BUILTIN_PBROADCASTB256_MASK,
31212 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
31213 IX86_BUILTIN_PBROADCASTB128_MASK,
31214 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
31215 IX86_BUILTIN_PBROADCASTW256_MASK,
31216 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
31217 IX86_BUILTIN_PBROADCASTW128_MASK,
31218 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
31219 IX86_BUILTIN_PBROADCASTD256_MASK,
31220 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
31221 IX86_BUILTIN_PBROADCASTD128_MASK,
31222 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
31223 IX86_BUILTIN_PBROADCASTQ256_MASK,
31224 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
31225 IX86_BUILTIN_PBROADCASTQ128_MASK,
31226 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
31227 IX86_BUILTIN_BROADCASTSS256,
31228 IX86_BUILTIN_BROADCASTSS128,
31229 IX86_BUILTIN_BROADCASTSD256,
31230 IX86_BUILTIN_EXTRACTF64X2_256,
31231 IX86_BUILTIN_EXTRACTI64X2_256,
31232 IX86_BUILTIN_INSERTF32X4_256,
31233 IX86_BUILTIN_INSERTI32X4_256,
31234 IX86_BUILTIN_PMOVSXBW256_MASK,
31235 IX86_BUILTIN_PMOVSXBW128_MASK,
31236 IX86_BUILTIN_PMOVSXBD256_MASK,
31237 IX86_BUILTIN_PMOVSXBD128_MASK,
31238 IX86_BUILTIN_PMOVSXBQ256_MASK,
31239 IX86_BUILTIN_PMOVSXBQ128_MASK,
31240 IX86_BUILTIN_PMOVSXWD256_MASK,
31241 IX86_BUILTIN_PMOVSXWD128_MASK,
31242 IX86_BUILTIN_PMOVSXWQ256_MASK,
31243 IX86_BUILTIN_PMOVSXWQ128_MASK,
31244 IX86_BUILTIN_PMOVSXDQ256_MASK,
31245 IX86_BUILTIN_PMOVSXDQ128_MASK,
31246 IX86_BUILTIN_PMOVZXBW256_MASK,
31247 IX86_BUILTIN_PMOVZXBW128_MASK,
31248 IX86_BUILTIN_PMOVZXBD256_MASK,
31249 IX86_BUILTIN_PMOVZXBD128_MASK,
31250 IX86_BUILTIN_PMOVZXBQ256_MASK,
31251 IX86_BUILTIN_PMOVZXBQ128_MASK,
31252 IX86_BUILTIN_PMOVZXWD256_MASK,
31253 IX86_BUILTIN_PMOVZXWD128_MASK,
31254 IX86_BUILTIN_PMOVZXWQ256_MASK,
31255 IX86_BUILTIN_PMOVZXWQ128_MASK,
31256 IX86_BUILTIN_PMOVZXDQ256_MASK,
31257 IX86_BUILTIN_PMOVZXDQ128_MASK,
31258 IX86_BUILTIN_REDUCEPD256_MASK,
31259 IX86_BUILTIN_REDUCEPD128_MASK,
31260 IX86_BUILTIN_REDUCEPS256_MASK,
31261 IX86_BUILTIN_REDUCEPS128_MASK,
31262 IX86_BUILTIN_REDUCESD_MASK,
31263 IX86_BUILTIN_REDUCESS_MASK,
31264 IX86_BUILTIN_VPERMVARHI256_MASK,
31265 IX86_BUILTIN_VPERMVARHI128_MASK,
31266 IX86_BUILTIN_VPERMT2VARHI256,
31267 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
31268 IX86_BUILTIN_VPERMT2VARHI128,
31269 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
31270 IX86_BUILTIN_VPERMI2VARHI256,
31271 IX86_BUILTIN_VPERMI2VARHI128,
31272 IX86_BUILTIN_RCP14PD256,
31273 IX86_BUILTIN_RCP14PD128,
31274 IX86_BUILTIN_RCP14PS256,
31275 IX86_BUILTIN_RCP14PS128,
31276 IX86_BUILTIN_RSQRT14PD256_MASK,
31277 IX86_BUILTIN_RSQRT14PD128_MASK,
31278 IX86_BUILTIN_RSQRT14PS256_MASK,
31279 IX86_BUILTIN_RSQRT14PS128_MASK,
31280 IX86_BUILTIN_SQRTPD256_MASK,
31281 IX86_BUILTIN_SQRTPD128_MASK,
31282 IX86_BUILTIN_SQRTPS256_MASK,
31283 IX86_BUILTIN_SQRTPS128_MASK,
31284 IX86_BUILTIN_PADDB128_MASK,
31285 IX86_BUILTIN_PADDW128_MASK,
31286 IX86_BUILTIN_PADDD128_MASK,
31287 IX86_BUILTIN_PADDQ128_MASK,
31288 IX86_BUILTIN_PSUBB128_MASK,
31289 IX86_BUILTIN_PSUBW128_MASK,
31290 IX86_BUILTIN_PSUBD128_MASK,
31291 IX86_BUILTIN_PSUBQ128_MASK,
31292 IX86_BUILTIN_PADDSB128_MASK,
31293 IX86_BUILTIN_PADDSW128_MASK,
31294 IX86_BUILTIN_PSUBSB128_MASK,
31295 IX86_BUILTIN_PSUBSW128_MASK,
31296 IX86_BUILTIN_PADDUSB128_MASK,
31297 IX86_BUILTIN_PADDUSW128_MASK,
31298 IX86_BUILTIN_PSUBUSB128_MASK,
31299 IX86_BUILTIN_PSUBUSW128_MASK,
31300 IX86_BUILTIN_PADDB256_MASK,
31301 IX86_BUILTIN_PADDW256_MASK,
31302 IX86_BUILTIN_PADDD256_MASK,
31303 IX86_BUILTIN_PADDQ256_MASK,
31304 IX86_BUILTIN_PADDSB256_MASK,
31305 IX86_BUILTIN_PADDSW256_MASK,
31306 IX86_BUILTIN_PADDUSB256_MASK,
31307 IX86_BUILTIN_PADDUSW256_MASK,
31308 IX86_BUILTIN_PSUBB256_MASK,
31309 IX86_BUILTIN_PSUBW256_MASK,
31310 IX86_BUILTIN_PSUBD256_MASK,
31311 IX86_BUILTIN_PSUBQ256_MASK,
31312 IX86_BUILTIN_PSUBSB256_MASK,
31313 IX86_BUILTIN_PSUBSW256_MASK,
31314 IX86_BUILTIN_PSUBUSB256_MASK,
31315 IX86_BUILTIN_PSUBUSW256_MASK,
31316 IX86_BUILTIN_SHUF_F64x2_256,
31317 IX86_BUILTIN_SHUF_I64x2_256,
31318 IX86_BUILTIN_SHUF_I32x4_256,
31319 IX86_BUILTIN_SHUF_F32x4_256,
31320 IX86_BUILTIN_PMOVWB128,
31321 IX86_BUILTIN_PMOVWB256,
31322 IX86_BUILTIN_PMOVSWB128,
31323 IX86_BUILTIN_PMOVSWB256,
31324 IX86_BUILTIN_PMOVUSWB128,
31325 IX86_BUILTIN_PMOVUSWB256,
31326 IX86_BUILTIN_PMOVDB128,
31327 IX86_BUILTIN_PMOVDB256,
31328 IX86_BUILTIN_PMOVSDB128,
31329 IX86_BUILTIN_PMOVSDB256,
31330 IX86_BUILTIN_PMOVUSDB128,
31331 IX86_BUILTIN_PMOVUSDB256,
31332 IX86_BUILTIN_PMOVDW128,
31333 IX86_BUILTIN_PMOVDW256,
31334 IX86_BUILTIN_PMOVSDW128,
31335 IX86_BUILTIN_PMOVSDW256,
31336 IX86_BUILTIN_PMOVUSDW128,
31337 IX86_BUILTIN_PMOVUSDW256,
31338 IX86_BUILTIN_PMOVQB128,
31339 IX86_BUILTIN_PMOVQB256,
31340 IX86_BUILTIN_PMOVSQB128,
31341 IX86_BUILTIN_PMOVSQB256,
31342 IX86_BUILTIN_PMOVUSQB128,
31343 IX86_BUILTIN_PMOVUSQB256,
31344 IX86_BUILTIN_PMOVQW128,
31345 IX86_BUILTIN_PMOVQW256,
31346 IX86_BUILTIN_PMOVSQW128,
31347 IX86_BUILTIN_PMOVSQW256,
31348 IX86_BUILTIN_PMOVUSQW128,
31349 IX86_BUILTIN_PMOVUSQW256,
31350 IX86_BUILTIN_PMOVQD128,
31351 IX86_BUILTIN_PMOVQD256,
31352 IX86_BUILTIN_PMOVSQD128,
31353 IX86_BUILTIN_PMOVSQD256,
31354 IX86_BUILTIN_PMOVUSQD128,
31355 IX86_BUILTIN_PMOVUSQD256,
31356 IX86_BUILTIN_RANGEPD256,
31357 IX86_BUILTIN_RANGEPD128,
31358 IX86_BUILTIN_RANGEPS256,
31359 IX86_BUILTIN_RANGEPS128,
31360 IX86_BUILTIN_GETEXPPS256,
31361 IX86_BUILTIN_GETEXPPD256,
31362 IX86_BUILTIN_GETEXPPS128,
31363 IX86_BUILTIN_GETEXPPD128,
31364 IX86_BUILTIN_FIXUPIMMPD256_MASK,
31365 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
31366 IX86_BUILTIN_FIXUPIMMPS256_MASK,
31367 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
31368 IX86_BUILTIN_FIXUPIMMPD128_MASK,
31369 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
31370 IX86_BUILTIN_FIXUPIMMPS128_MASK,
31371 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
31372 IX86_BUILTIN_PABSQ256,
31373 IX86_BUILTIN_PABSQ128,
31374 IX86_BUILTIN_PABSD256_MASK,
31375 IX86_BUILTIN_PABSD128_MASK,
31376 IX86_BUILTIN_PMULHRSW256_MASK,
31377 IX86_BUILTIN_PMULHRSW128_MASK,
31378 IX86_BUILTIN_PMULHUW128_MASK,
31379 IX86_BUILTIN_PMULHUW256_MASK,
31380 IX86_BUILTIN_PMULHW256_MASK,
31381 IX86_BUILTIN_PMULHW128_MASK,
31382 IX86_BUILTIN_PMULLW256_MASK,
31383 IX86_BUILTIN_PMULLW128_MASK,
31384 IX86_BUILTIN_PMULLQ256,
31385 IX86_BUILTIN_PMULLQ128,
31386 IX86_BUILTIN_ANDPD256_MASK,
31387 IX86_BUILTIN_ANDPD128_MASK,
31388 IX86_BUILTIN_ANDPS256_MASK,
31389 IX86_BUILTIN_ANDPS128_MASK,
31390 IX86_BUILTIN_ANDNPD256_MASK,
31391 IX86_BUILTIN_ANDNPD128_MASK,
31392 IX86_BUILTIN_ANDNPS256_MASK,
31393 IX86_BUILTIN_ANDNPS128_MASK,
31394 IX86_BUILTIN_PSLLWI128_MASK,
31395 IX86_BUILTIN_PSLLDI128_MASK,
31396 IX86_BUILTIN_PSLLQI128_MASK,
31397 IX86_BUILTIN_PSLLW128_MASK,
31398 IX86_BUILTIN_PSLLD128_MASK,
31399 IX86_BUILTIN_PSLLQ128_MASK,
31400 IX86_BUILTIN_PSLLWI256_MASK ,
31401 IX86_BUILTIN_PSLLW256_MASK,
31402 IX86_BUILTIN_PSLLDI256_MASK,
31403 IX86_BUILTIN_PSLLD256_MASK,
31404 IX86_BUILTIN_PSLLQI256_MASK,
31405 IX86_BUILTIN_PSLLQ256_MASK,
31406 IX86_BUILTIN_PSRADI128_MASK,
31407 IX86_BUILTIN_PSRAD128_MASK,
31408 IX86_BUILTIN_PSRADI256_MASK,
31409 IX86_BUILTIN_PSRAD256_MASK,
31410 IX86_BUILTIN_PSRAQI128_MASK,
31411 IX86_BUILTIN_PSRAQ128_MASK,
31412 IX86_BUILTIN_PSRAQI256_MASK,
31413 IX86_BUILTIN_PSRAQ256_MASK,
31414 IX86_BUILTIN_PANDD256,
31415 IX86_BUILTIN_PANDD128,
31416 IX86_BUILTIN_PSRLDI128_MASK,
31417 IX86_BUILTIN_PSRLD128_MASK,
31418 IX86_BUILTIN_PSRLDI256_MASK,
31419 IX86_BUILTIN_PSRLD256_MASK,
31420 IX86_BUILTIN_PSRLQI128_MASK,
31421 IX86_BUILTIN_PSRLQ128_MASK,
31422 IX86_BUILTIN_PSRLQI256_MASK,
31423 IX86_BUILTIN_PSRLQ256_MASK,
31424 IX86_BUILTIN_PANDQ256,
31425 IX86_BUILTIN_PANDQ128,
31426 IX86_BUILTIN_PANDND256,
31427 IX86_BUILTIN_PANDND128,
31428 IX86_BUILTIN_PANDNQ256,
31429 IX86_BUILTIN_PANDNQ128,
31430 IX86_BUILTIN_PORD256,
31431 IX86_BUILTIN_PORD128,
31432 IX86_BUILTIN_PORQ256,
31433 IX86_BUILTIN_PORQ128,
31434 IX86_BUILTIN_PXORD256,
31435 IX86_BUILTIN_PXORD128,
31436 IX86_BUILTIN_PXORQ256,
31437 IX86_BUILTIN_PXORQ128,
31438 IX86_BUILTIN_PACKSSWB256_MASK,
31439 IX86_BUILTIN_PACKSSWB128_MASK,
31440 IX86_BUILTIN_PACKUSWB256_MASK,
31441 IX86_BUILTIN_PACKUSWB128_MASK,
31442 IX86_BUILTIN_RNDSCALEPS256,
31443 IX86_BUILTIN_RNDSCALEPD256,
31444 IX86_BUILTIN_RNDSCALEPS128,
31445 IX86_BUILTIN_RNDSCALEPD128,
31446 IX86_BUILTIN_VTERNLOGQ256_MASK,
31447 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
31448 IX86_BUILTIN_VTERNLOGD256_MASK,
31449 IX86_BUILTIN_VTERNLOGD256_MASKZ,
31450 IX86_BUILTIN_VTERNLOGQ128_MASK,
31451 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
31452 IX86_BUILTIN_VTERNLOGD128_MASK,
31453 IX86_BUILTIN_VTERNLOGD128_MASKZ,
31454 IX86_BUILTIN_SCALEFPD256,
31455 IX86_BUILTIN_SCALEFPS256,
31456 IX86_BUILTIN_SCALEFPD128,
31457 IX86_BUILTIN_SCALEFPS128,
31458 IX86_BUILTIN_VFMADDPD256_MASK,
31459 IX86_BUILTIN_VFMADDPD256_MASK3,
31460 IX86_BUILTIN_VFMADDPD256_MASKZ,
31461 IX86_BUILTIN_VFMADDPD128_MASK,
31462 IX86_BUILTIN_VFMADDPD128_MASK3,
31463 IX86_BUILTIN_VFMADDPD128_MASKZ,
31464 IX86_BUILTIN_VFMADDPS256_MASK,
31465 IX86_BUILTIN_VFMADDPS256_MASK3,
31466 IX86_BUILTIN_VFMADDPS256_MASKZ,
31467 IX86_BUILTIN_VFMADDPS128_MASK,
31468 IX86_BUILTIN_VFMADDPS128_MASK3,
31469 IX86_BUILTIN_VFMADDPS128_MASKZ,
31470 IX86_BUILTIN_VFMSUBPD256_MASK3,
31471 IX86_BUILTIN_VFMSUBPD128_MASK3,
31472 IX86_BUILTIN_VFMSUBPS256_MASK3,
31473 IX86_BUILTIN_VFMSUBPS128_MASK3,
31474 IX86_BUILTIN_VFNMADDPD256_MASK,
31475 IX86_BUILTIN_VFNMADDPD128_MASK,
31476 IX86_BUILTIN_VFNMADDPS256_MASK,
31477 IX86_BUILTIN_VFNMADDPS128_MASK,
31478 IX86_BUILTIN_VFNMSUBPD256_MASK,
31479 IX86_BUILTIN_VFNMSUBPD256_MASK3,
31480 IX86_BUILTIN_VFNMSUBPD128_MASK,
31481 IX86_BUILTIN_VFNMSUBPD128_MASK3,
31482 IX86_BUILTIN_VFNMSUBPS256_MASK,
31483 IX86_BUILTIN_VFNMSUBPS256_MASK3,
31484 IX86_BUILTIN_VFNMSUBPS128_MASK,
31485 IX86_BUILTIN_VFNMSUBPS128_MASK3,
31486 IX86_BUILTIN_VFMADDSUBPD256_MASK,
31487 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
31488 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
31489 IX86_BUILTIN_VFMADDSUBPD128_MASK,
31490 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
31491 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
31492 IX86_BUILTIN_VFMADDSUBPS256_MASK,
31493 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
31494 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
31495 IX86_BUILTIN_VFMADDSUBPS128_MASK,
31496 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
31497 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
31498 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
31499 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
31500 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
31501 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
31502 IX86_BUILTIN_INSERTF64X2_256,
31503 IX86_BUILTIN_INSERTI64X2_256,
31504 IX86_BUILTIN_PSRAVV16HI,
31505 IX86_BUILTIN_PSRAVV8HI,
31506 IX86_BUILTIN_PMADDUBSW256_MASK,
31507 IX86_BUILTIN_PMADDUBSW128_MASK,
31508 IX86_BUILTIN_PMADDWD256_MASK,
31509 IX86_BUILTIN_PMADDWD128_MASK,
31510 IX86_BUILTIN_PSRLVV16HI,
31511 IX86_BUILTIN_PSRLVV8HI,
31512 IX86_BUILTIN_CVTPS2DQ256_MASK,
31513 IX86_BUILTIN_CVTPS2DQ128_MASK,
31514 IX86_BUILTIN_CVTPS2UDQ256,
31515 IX86_BUILTIN_CVTPS2UDQ128,
31516 IX86_BUILTIN_CVTPS2QQ256,
31517 IX86_BUILTIN_CVTPS2QQ128,
31518 IX86_BUILTIN_CVTPS2UQQ256,
31519 IX86_BUILTIN_CVTPS2UQQ128,
31520 IX86_BUILTIN_GETMANTPS256,
31521 IX86_BUILTIN_GETMANTPS128,
31522 IX86_BUILTIN_GETMANTPD256,
31523 IX86_BUILTIN_GETMANTPD128,
31524 IX86_BUILTIN_MOVDDUP256_MASK,
31525 IX86_BUILTIN_MOVDDUP128_MASK,
31526 IX86_BUILTIN_MOVSHDUP256_MASK,
31527 IX86_BUILTIN_MOVSHDUP128_MASK,
31528 IX86_BUILTIN_MOVSLDUP256_MASK,
31529 IX86_BUILTIN_MOVSLDUP128_MASK,
31530 IX86_BUILTIN_CVTQQ2PS256,
31531 IX86_BUILTIN_CVTQQ2PS128,
31532 IX86_BUILTIN_CVTUQQ2PS256,
31533 IX86_BUILTIN_CVTUQQ2PS128,
31534 IX86_BUILTIN_CVTQQ2PD256,
31535 IX86_BUILTIN_CVTQQ2PD128,
31536 IX86_BUILTIN_CVTUQQ2PD256,
31537 IX86_BUILTIN_CVTUQQ2PD128,
31538 IX86_BUILTIN_VPERMT2VARQ256,
31539 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
31540 IX86_BUILTIN_VPERMT2VARD256,
31541 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
31542 IX86_BUILTIN_VPERMI2VARQ256,
31543 IX86_BUILTIN_VPERMI2VARD256,
31544 IX86_BUILTIN_VPERMT2VARPD256,
31545 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
31546 IX86_BUILTIN_VPERMT2VARPS256,
31547 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
31548 IX86_BUILTIN_VPERMI2VARPD256,
31549 IX86_BUILTIN_VPERMI2VARPS256,
31550 IX86_BUILTIN_VPERMT2VARQ128,
31551 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
31552 IX86_BUILTIN_VPERMT2VARD128,
31553 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
31554 IX86_BUILTIN_VPERMI2VARQ128,
31555 IX86_BUILTIN_VPERMI2VARD128,
31556 IX86_BUILTIN_VPERMT2VARPD128,
31557 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
31558 IX86_BUILTIN_VPERMT2VARPS128,
31559 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
31560 IX86_BUILTIN_VPERMI2VARPD128,
31561 IX86_BUILTIN_VPERMI2VARPS128,
31562 IX86_BUILTIN_PSHUFB256_MASK,
31563 IX86_BUILTIN_PSHUFB128_MASK,
31564 IX86_BUILTIN_PSHUFHW256_MASK,
31565 IX86_BUILTIN_PSHUFHW128_MASK,
31566 IX86_BUILTIN_PSHUFLW256_MASK,
31567 IX86_BUILTIN_PSHUFLW128_MASK,
31568 IX86_BUILTIN_PSHUFD256_MASK,
31569 IX86_BUILTIN_PSHUFD128_MASK,
31570 IX86_BUILTIN_SHUFPD256_MASK,
31571 IX86_BUILTIN_SHUFPD128_MASK,
31572 IX86_BUILTIN_SHUFPS256_MASK,
31573 IX86_BUILTIN_SHUFPS128_MASK,
31574 IX86_BUILTIN_PROLVQ256,
31575 IX86_BUILTIN_PROLVQ128,
31576 IX86_BUILTIN_PROLQ256,
31577 IX86_BUILTIN_PROLQ128,
31578 IX86_BUILTIN_PRORVQ256,
31579 IX86_BUILTIN_PRORVQ128,
31580 IX86_BUILTIN_PRORQ256,
31581 IX86_BUILTIN_PRORQ128,
31582 IX86_BUILTIN_PSRAVQ128,
31583 IX86_BUILTIN_PSRAVQ256,
31584 IX86_BUILTIN_PSLLVV4DI_MASK,
31585 IX86_BUILTIN_PSLLVV2DI_MASK,
31586 IX86_BUILTIN_PSLLVV8SI_MASK,
31587 IX86_BUILTIN_PSLLVV4SI_MASK,
31588 IX86_BUILTIN_PSRAVV8SI_MASK,
31589 IX86_BUILTIN_PSRAVV4SI_MASK,
31590 IX86_BUILTIN_PSRLVV4DI_MASK,
31591 IX86_BUILTIN_PSRLVV2DI_MASK,
31592 IX86_BUILTIN_PSRLVV8SI_MASK,
31593 IX86_BUILTIN_PSRLVV4SI_MASK,
31594 IX86_BUILTIN_PSRAWI256_MASK,
31595 IX86_BUILTIN_PSRAW256_MASK,
31596 IX86_BUILTIN_PSRAWI128_MASK,
31597 IX86_BUILTIN_PSRAW128_MASK,
31598 IX86_BUILTIN_PSRLWI256_MASK,
31599 IX86_BUILTIN_PSRLW256_MASK,
31600 IX86_BUILTIN_PSRLWI128_MASK,
31601 IX86_BUILTIN_PSRLW128_MASK,
31602 IX86_BUILTIN_PRORVD256,
31603 IX86_BUILTIN_PROLVD256,
31604 IX86_BUILTIN_PRORD256,
31605 IX86_BUILTIN_PROLD256,
31606 IX86_BUILTIN_PRORVD128,
31607 IX86_BUILTIN_PROLVD128,
31608 IX86_BUILTIN_PRORD128,
31609 IX86_BUILTIN_PROLD128,
31610 IX86_BUILTIN_FPCLASSPD256,
31611 IX86_BUILTIN_FPCLASSPD128,
31612 IX86_BUILTIN_FPCLASSSD,
31613 IX86_BUILTIN_FPCLASSPS256,
31614 IX86_BUILTIN_FPCLASSPS128,
31615 IX86_BUILTIN_FPCLASSSS,
31616 IX86_BUILTIN_CVTB2MASK128,
31617 IX86_BUILTIN_CVTB2MASK256,
31618 IX86_BUILTIN_CVTW2MASK128,
31619 IX86_BUILTIN_CVTW2MASK256,
31620 IX86_BUILTIN_CVTD2MASK128,
31621 IX86_BUILTIN_CVTD2MASK256,
31622 IX86_BUILTIN_CVTQ2MASK128,
31623 IX86_BUILTIN_CVTQ2MASK256,
31624 IX86_BUILTIN_CVTMASK2B128,
31625 IX86_BUILTIN_CVTMASK2B256,
31626 IX86_BUILTIN_CVTMASK2W128,
31627 IX86_BUILTIN_CVTMASK2W256,
31628 IX86_BUILTIN_CVTMASK2D128,
31629 IX86_BUILTIN_CVTMASK2D256,
31630 IX86_BUILTIN_CVTMASK2Q128,
31631 IX86_BUILTIN_CVTMASK2Q256,
31632 IX86_BUILTIN_PCMPEQB128_MASK,
31633 IX86_BUILTIN_PCMPEQB256_MASK,
31634 IX86_BUILTIN_PCMPEQW128_MASK,
31635 IX86_BUILTIN_PCMPEQW256_MASK,
31636 IX86_BUILTIN_PCMPEQD128_MASK,
31637 IX86_BUILTIN_PCMPEQD256_MASK,
31638 IX86_BUILTIN_PCMPEQQ128_MASK,
31639 IX86_BUILTIN_PCMPEQQ256_MASK,
31640 IX86_BUILTIN_PCMPGTB128_MASK,
31641 IX86_BUILTIN_PCMPGTB256_MASK,
31642 IX86_BUILTIN_PCMPGTW128_MASK,
31643 IX86_BUILTIN_PCMPGTW256_MASK,
31644 IX86_BUILTIN_PCMPGTD128_MASK,
31645 IX86_BUILTIN_PCMPGTD256_MASK,
31646 IX86_BUILTIN_PCMPGTQ128_MASK,
31647 IX86_BUILTIN_PCMPGTQ256_MASK,
31648 IX86_BUILTIN_PTESTMB128,
31649 IX86_BUILTIN_PTESTMB256,
31650 IX86_BUILTIN_PTESTMW128,
31651 IX86_BUILTIN_PTESTMW256,
31652 IX86_BUILTIN_PTESTMD128,
31653 IX86_BUILTIN_PTESTMD256,
31654 IX86_BUILTIN_PTESTMQ128,
31655 IX86_BUILTIN_PTESTMQ256,
31656 IX86_BUILTIN_PTESTNMB128,
31657 IX86_BUILTIN_PTESTNMB256,
31658 IX86_BUILTIN_PTESTNMW128,
31659 IX86_BUILTIN_PTESTNMW256,
31660 IX86_BUILTIN_PTESTNMD128,
31661 IX86_BUILTIN_PTESTNMD256,
31662 IX86_BUILTIN_PTESTNMQ128,
31663 IX86_BUILTIN_PTESTNMQ256,
31664 IX86_BUILTIN_PBROADCASTMB128,
31665 IX86_BUILTIN_PBROADCASTMB256,
31666 IX86_BUILTIN_PBROADCASTMW128,
31667 IX86_BUILTIN_PBROADCASTMW256,
31668 IX86_BUILTIN_COMPRESSPD256,
31669 IX86_BUILTIN_COMPRESSPD128,
31670 IX86_BUILTIN_COMPRESSPS256,
31671 IX86_BUILTIN_COMPRESSPS128,
31672 IX86_BUILTIN_PCOMPRESSQ256,
31673 IX86_BUILTIN_PCOMPRESSQ128,
31674 IX86_BUILTIN_PCOMPRESSD256,
31675 IX86_BUILTIN_PCOMPRESSD128,
31676 IX86_BUILTIN_EXPANDPD256,
31677 IX86_BUILTIN_EXPANDPD128,
31678 IX86_BUILTIN_EXPANDPS256,
31679 IX86_BUILTIN_EXPANDPS128,
31680 IX86_BUILTIN_PEXPANDQ256,
31681 IX86_BUILTIN_PEXPANDQ128,
31682 IX86_BUILTIN_PEXPANDD256,
31683 IX86_BUILTIN_PEXPANDD128,
31684 IX86_BUILTIN_EXPANDPD256Z,
31685 IX86_BUILTIN_EXPANDPD128Z,
31686 IX86_BUILTIN_EXPANDPS256Z,
31687 IX86_BUILTIN_EXPANDPS128Z,
31688 IX86_BUILTIN_PEXPANDQ256Z,
31689 IX86_BUILTIN_PEXPANDQ128Z,
31690 IX86_BUILTIN_PEXPANDD256Z,
31691 IX86_BUILTIN_PEXPANDD128Z,
31692 IX86_BUILTIN_PMAXSD256_MASK,
31693 IX86_BUILTIN_PMINSD256_MASK,
31694 IX86_BUILTIN_PMAXUD256_MASK,
31695 IX86_BUILTIN_PMINUD256_MASK,
31696 IX86_BUILTIN_PMAXSD128_MASK,
31697 IX86_BUILTIN_PMINSD128_MASK,
31698 IX86_BUILTIN_PMAXUD128_MASK,
31699 IX86_BUILTIN_PMINUD128_MASK,
31700 IX86_BUILTIN_PMAXSQ256_MASK,
31701 IX86_BUILTIN_PMINSQ256_MASK,
31702 IX86_BUILTIN_PMAXUQ256_MASK,
31703 IX86_BUILTIN_PMINUQ256_MASK,
31704 IX86_BUILTIN_PMAXSQ128_MASK,
31705 IX86_BUILTIN_PMINSQ128_MASK,
31706 IX86_BUILTIN_PMAXUQ128_MASK,
31707 IX86_BUILTIN_PMINUQ128_MASK,
31708 IX86_BUILTIN_PMINSB256_MASK,
31709 IX86_BUILTIN_PMINUB256_MASK,
31710 IX86_BUILTIN_PMAXSB256_MASK,
31711 IX86_BUILTIN_PMAXUB256_MASK,
31712 IX86_BUILTIN_PMINSB128_MASK,
31713 IX86_BUILTIN_PMINUB128_MASK,
31714 IX86_BUILTIN_PMAXSB128_MASK,
31715 IX86_BUILTIN_PMAXUB128_MASK,
31716 IX86_BUILTIN_PMINSW256_MASK,
31717 IX86_BUILTIN_PMINUW256_MASK,
31718 IX86_BUILTIN_PMAXSW256_MASK,
31719 IX86_BUILTIN_PMAXUW256_MASK,
31720 IX86_BUILTIN_PMINSW128_MASK,
31721 IX86_BUILTIN_PMINUW128_MASK,
31722 IX86_BUILTIN_PMAXSW128_MASK,
31723 IX86_BUILTIN_PMAXUW128_MASK,
31724 IX86_BUILTIN_VPCONFLICTQ256,
31725 IX86_BUILTIN_VPCONFLICTD256,
31726 IX86_BUILTIN_VPCLZCNTQ256,
31727 IX86_BUILTIN_VPCLZCNTD256,
31728 IX86_BUILTIN_UNPCKHPD256_MASK,
31729 IX86_BUILTIN_UNPCKHPD128_MASK,
31730 IX86_BUILTIN_UNPCKHPS256_MASK,
31731 IX86_BUILTIN_UNPCKHPS128_MASK,
31732 IX86_BUILTIN_UNPCKLPD256_MASK,
31733 IX86_BUILTIN_UNPCKLPD128_MASK,
31734 IX86_BUILTIN_UNPCKLPS256_MASK,
31735 IX86_BUILTIN_VPCONFLICTQ128,
31736 IX86_BUILTIN_VPCONFLICTD128,
31737 IX86_BUILTIN_VPCLZCNTQ128,
31738 IX86_BUILTIN_VPCLZCNTD128,
31739 IX86_BUILTIN_UNPCKLPS128_MASK,
31740 IX86_BUILTIN_ALIGND256,
31741 IX86_BUILTIN_ALIGNQ256,
31742 IX86_BUILTIN_ALIGND128,
31743 IX86_BUILTIN_ALIGNQ128,
31744 IX86_BUILTIN_CVTPS2PH256_MASK,
31745 IX86_BUILTIN_CVTPS2PH_MASK,
31746 IX86_BUILTIN_CVTPH2PS_MASK,
31747 IX86_BUILTIN_CVTPH2PS256_MASK,
31748 IX86_BUILTIN_PUNPCKHDQ128_MASK,
31749 IX86_BUILTIN_PUNPCKHDQ256_MASK,
31750 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
31751 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
31752 IX86_BUILTIN_PUNPCKLDQ128_MASK,
31753 IX86_BUILTIN_PUNPCKLDQ256_MASK,
31754 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
31755 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
31756 IX86_BUILTIN_PUNPCKHBW128_MASK,
31757 IX86_BUILTIN_PUNPCKHBW256_MASK,
31758 IX86_BUILTIN_PUNPCKHWD128_MASK,
31759 IX86_BUILTIN_PUNPCKHWD256_MASK,
31760 IX86_BUILTIN_PUNPCKLBW128_MASK,
31761 IX86_BUILTIN_PUNPCKLBW256_MASK,
31762 IX86_BUILTIN_PUNPCKLWD128_MASK,
31763 IX86_BUILTIN_PUNPCKLWD256_MASK,
31764 IX86_BUILTIN_PSLLVV16HI,
31765 IX86_BUILTIN_PSLLVV8HI,
31766 IX86_BUILTIN_PACKSSDW256_MASK,
31767 IX86_BUILTIN_PACKSSDW128_MASK,
31768 IX86_BUILTIN_PACKUSDW256_MASK,
31769 IX86_BUILTIN_PACKUSDW128_MASK,
31770 IX86_BUILTIN_PAVGB256_MASK,
31771 IX86_BUILTIN_PAVGW256_MASK,
31772 IX86_BUILTIN_PAVGB128_MASK,
31773 IX86_BUILTIN_PAVGW128_MASK,
31774 IX86_BUILTIN_VPERMVARSF256_MASK,
31775 IX86_BUILTIN_VPERMVARDF256_MASK,
31776 IX86_BUILTIN_VPERMDF256_MASK,
31777 IX86_BUILTIN_PABSB256_MASK,
31778 IX86_BUILTIN_PABSB128_MASK,
31779 IX86_BUILTIN_PABSW256_MASK,
31780 IX86_BUILTIN_PABSW128_MASK,
31781 IX86_BUILTIN_VPERMILVARPD_MASK,
31782 IX86_BUILTIN_VPERMILVARPS_MASK,
31783 IX86_BUILTIN_VPERMILVARPD256_MASK,
31784 IX86_BUILTIN_VPERMILVARPS256_MASK,
31785 IX86_BUILTIN_VPERMILPD_MASK,
31786 IX86_BUILTIN_VPERMILPS_MASK,
31787 IX86_BUILTIN_VPERMILPD256_MASK,
31788 IX86_BUILTIN_VPERMILPS256_MASK,
31789 IX86_BUILTIN_BLENDMQ256,
31790 IX86_BUILTIN_BLENDMD256,
31791 IX86_BUILTIN_BLENDMPD256,
31792 IX86_BUILTIN_BLENDMPS256,
31793 IX86_BUILTIN_BLENDMQ128,
31794 IX86_BUILTIN_BLENDMD128,
31795 IX86_BUILTIN_BLENDMPD128,
31796 IX86_BUILTIN_BLENDMPS128,
31797 IX86_BUILTIN_BLENDMW256,
31798 IX86_BUILTIN_BLENDMB256,
31799 IX86_BUILTIN_BLENDMW128,
31800 IX86_BUILTIN_BLENDMB128,
31801 IX86_BUILTIN_PMULLD256_MASK,
31802 IX86_BUILTIN_PMULLD128_MASK,
31803 IX86_BUILTIN_PMULUDQ256_MASK,
31804 IX86_BUILTIN_PMULDQ256_MASK,
31805 IX86_BUILTIN_PMULDQ128_MASK,
31806 IX86_BUILTIN_PMULUDQ128_MASK,
31807 IX86_BUILTIN_CVTPD2PS256_MASK,
31808 IX86_BUILTIN_CVTPD2PS_MASK,
31809 IX86_BUILTIN_VPERMVARSI256_MASK,
31810 IX86_BUILTIN_VPERMVARDI256_MASK,
31811 IX86_BUILTIN_VPERMDI256_MASK,
31812 IX86_BUILTIN_CMPQ256,
31813 IX86_BUILTIN_CMPD256,
31814 IX86_BUILTIN_UCMPQ256,
31815 IX86_BUILTIN_UCMPD256,
31816 IX86_BUILTIN_CMPB256,
31817 IX86_BUILTIN_CMPW256,
31818 IX86_BUILTIN_UCMPB256,
31819 IX86_BUILTIN_UCMPW256,
31820 IX86_BUILTIN_CMPPD256_MASK,
31821 IX86_BUILTIN_CMPPS256_MASK,
31822 IX86_BUILTIN_CMPQ128,
31823 IX86_BUILTIN_CMPD128,
31824 IX86_BUILTIN_UCMPQ128,
31825 IX86_BUILTIN_UCMPD128,
31826 IX86_BUILTIN_CMPB128,
31827 IX86_BUILTIN_CMPW128,
31828 IX86_BUILTIN_UCMPB128,
31829 IX86_BUILTIN_UCMPW128,
31830 IX86_BUILTIN_CMPPD128_MASK,
31831 IX86_BUILTIN_CMPPS128_MASK,
31833 IX86_BUILTIN_GATHER3SIV8SF,
31834 IX86_BUILTIN_GATHER3SIV4SF,
31835 IX86_BUILTIN_GATHER3SIV4DF,
31836 IX86_BUILTIN_GATHER3SIV2DF,
31837 IX86_BUILTIN_GATHER3DIV8SF,
31838 IX86_BUILTIN_GATHER3DIV4SF,
31839 IX86_BUILTIN_GATHER3DIV4DF,
31840 IX86_BUILTIN_GATHER3DIV2DF,
31841 IX86_BUILTIN_GATHER3SIV8SI,
31842 IX86_BUILTIN_GATHER3SIV4SI,
31843 IX86_BUILTIN_GATHER3SIV4DI,
31844 IX86_BUILTIN_GATHER3SIV2DI,
31845 IX86_BUILTIN_GATHER3DIV8SI,
31846 IX86_BUILTIN_GATHER3DIV4SI,
31847 IX86_BUILTIN_GATHER3DIV4DI,
31848 IX86_BUILTIN_GATHER3DIV2DI,
31849 IX86_BUILTIN_SCATTERSIV8SF,
31850 IX86_BUILTIN_SCATTERSIV4SF,
31851 IX86_BUILTIN_SCATTERSIV4DF,
31852 IX86_BUILTIN_SCATTERSIV2DF,
31853 IX86_BUILTIN_SCATTERDIV8SF,
31854 IX86_BUILTIN_SCATTERDIV4SF,
31855 IX86_BUILTIN_SCATTERDIV4DF,
31856 IX86_BUILTIN_SCATTERDIV2DF,
31857 IX86_BUILTIN_SCATTERSIV8SI,
31858 IX86_BUILTIN_SCATTERSIV4SI,
31859 IX86_BUILTIN_SCATTERSIV4DI,
31860 IX86_BUILTIN_SCATTERSIV2DI,
31861 IX86_BUILTIN_SCATTERDIV8SI,
31862 IX86_BUILTIN_SCATTERDIV4SI,
31863 IX86_BUILTIN_SCATTERDIV4DI,
31864 IX86_BUILTIN_SCATTERDIV2DI,
31867 IX86_BUILTIN_RANGESD128,
31868 IX86_BUILTIN_RANGESS128,
31869 IX86_BUILTIN_KUNPCKWD,
31870 IX86_BUILTIN_KUNPCKDQ,
31871 IX86_BUILTIN_BROADCASTF32x2_512,
31872 IX86_BUILTIN_BROADCASTI32x2_512,
31873 IX86_BUILTIN_BROADCASTF64X2_512,
31874 IX86_BUILTIN_BROADCASTI64X2_512,
31875 IX86_BUILTIN_BROADCASTF32X8_512,
31876 IX86_BUILTIN_BROADCASTI32X8_512,
31877 IX86_BUILTIN_EXTRACTF64X2_512,
31878 IX86_BUILTIN_EXTRACTF32X8,
31879 IX86_BUILTIN_EXTRACTI64X2_512,
31880 IX86_BUILTIN_EXTRACTI32X8,
31881 IX86_BUILTIN_REDUCEPD512_MASK,
31882 IX86_BUILTIN_REDUCEPS512_MASK,
31883 IX86_BUILTIN_PMULLQ512,
31884 IX86_BUILTIN_XORPD512,
31885 IX86_BUILTIN_XORPS512,
31886 IX86_BUILTIN_ORPD512,
31887 IX86_BUILTIN_ORPS512,
31888 IX86_BUILTIN_ANDPD512,
31889 IX86_BUILTIN_ANDPS512,
31890 IX86_BUILTIN_ANDNPD512,
31891 IX86_BUILTIN_ANDNPS512,
31892 IX86_BUILTIN_INSERTF32X8,
31893 IX86_BUILTIN_INSERTI32X8,
31894 IX86_BUILTIN_INSERTF64X2_512,
31895 IX86_BUILTIN_INSERTI64X2_512,
31896 IX86_BUILTIN_FPCLASSPD512,
31897 IX86_BUILTIN_FPCLASSPS512,
31898 IX86_BUILTIN_CVTD2MASK512,
31899 IX86_BUILTIN_CVTQ2MASK512,
31900 IX86_BUILTIN_CVTMASK2D512,
31901 IX86_BUILTIN_CVTMASK2Q512,
31902 IX86_BUILTIN_CVTPD2QQ512,
31903 IX86_BUILTIN_CVTPS2QQ512,
31904 IX86_BUILTIN_CVTPD2UQQ512,
31905 IX86_BUILTIN_CVTPS2UQQ512,
31906 IX86_BUILTIN_CVTQQ2PS512,
31907 IX86_BUILTIN_CVTUQQ2PS512,
31908 IX86_BUILTIN_CVTQQ2PD512,
31909 IX86_BUILTIN_CVTUQQ2PD512,
31910 IX86_BUILTIN_CVTTPS2QQ512,
31911 IX86_BUILTIN_CVTTPS2UQQ512,
31912 IX86_BUILTIN_CVTTPD2QQ512,
31913 IX86_BUILTIN_CVTTPD2UQQ512,
31914 IX86_BUILTIN_RANGEPS512,
31915 IX86_BUILTIN_RANGEPD512,
31918 IX86_BUILTIN_PACKUSDW512,
31919 IX86_BUILTIN_PACKSSDW512,
31920 IX86_BUILTIN_LOADDQUHI512_MASK,
31921 IX86_BUILTIN_LOADDQUQI512_MASK,
31922 IX86_BUILTIN_PSLLDQ512,
31923 IX86_BUILTIN_PSRLDQ512,
31924 IX86_BUILTIN_STOREDQUHI512_MASK,
31925 IX86_BUILTIN_STOREDQUQI512_MASK,
31926 IX86_BUILTIN_PALIGNR512,
31927 IX86_BUILTIN_PALIGNR512_MASK,
31928 IX86_BUILTIN_MOVDQUHI512_MASK,
31929 IX86_BUILTIN_MOVDQUQI512_MASK,
31930 IX86_BUILTIN_PSADBW512,
31931 IX86_BUILTIN_DBPSADBW512,
31932 IX86_BUILTIN_PBROADCASTB512,
31933 IX86_BUILTIN_PBROADCASTB512_GPR,
31934 IX86_BUILTIN_PBROADCASTW512,
31935 IX86_BUILTIN_PBROADCASTW512_GPR,
31936 IX86_BUILTIN_PMOVSXBW512_MASK,
31937 IX86_BUILTIN_PMOVZXBW512_MASK,
31938 IX86_BUILTIN_VPERMVARHI512_MASK,
31939 IX86_BUILTIN_VPERMT2VARHI512,
31940 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
31941 IX86_BUILTIN_VPERMI2VARHI512,
31942 IX86_BUILTIN_PAVGB512,
31943 IX86_BUILTIN_PAVGW512,
31944 IX86_BUILTIN_PADDB512,
31945 IX86_BUILTIN_PSUBB512,
31946 IX86_BUILTIN_PSUBSB512,
31947 IX86_BUILTIN_PADDSB512,
31948 IX86_BUILTIN_PSUBUSB512,
31949 IX86_BUILTIN_PADDUSB512,
31950 IX86_BUILTIN_PSUBW512,
31951 IX86_BUILTIN_PADDW512,
31952 IX86_BUILTIN_PSUBSW512,
31953 IX86_BUILTIN_PADDSW512,
31954 IX86_BUILTIN_PSUBUSW512,
31955 IX86_BUILTIN_PADDUSW512,
31956 IX86_BUILTIN_PMAXUW512,
31957 IX86_BUILTIN_PMAXSW512,
31958 IX86_BUILTIN_PMINUW512,
31959 IX86_BUILTIN_PMINSW512,
31960 IX86_BUILTIN_PMAXUB512,
31961 IX86_BUILTIN_PMAXSB512,
31962 IX86_BUILTIN_PMINUB512,
31963 IX86_BUILTIN_PMINSB512,
31964 IX86_BUILTIN_PMOVWB512,
31965 IX86_BUILTIN_PMOVSWB512,
31966 IX86_BUILTIN_PMOVUSWB512,
31967 IX86_BUILTIN_PMULHRSW512_MASK,
31968 IX86_BUILTIN_PMULHUW512_MASK,
31969 IX86_BUILTIN_PMULHW512_MASK,
31970 IX86_BUILTIN_PMULLW512_MASK,
31971 IX86_BUILTIN_PSLLWI512_MASK,
31972 IX86_BUILTIN_PSLLW512_MASK,
31973 IX86_BUILTIN_PACKSSWB512,
31974 IX86_BUILTIN_PACKUSWB512,
31975 IX86_BUILTIN_PSRAVV32HI,
31976 IX86_BUILTIN_PMADDUBSW512_MASK,
31977 IX86_BUILTIN_PMADDWD512_MASK,
31978 IX86_BUILTIN_PSRLVV32HI,
31979 IX86_BUILTIN_PUNPCKHBW512,
31980 IX86_BUILTIN_PUNPCKHWD512,
31981 IX86_BUILTIN_PUNPCKLBW512,
31982 IX86_BUILTIN_PUNPCKLWD512,
31983 IX86_BUILTIN_PSHUFB512,
31984 IX86_BUILTIN_PSHUFHW512,
31985 IX86_BUILTIN_PSHUFLW512,
31986 IX86_BUILTIN_PSRAWI512,
31987 IX86_BUILTIN_PSRAW512,
31988 IX86_BUILTIN_PSRLWI512,
31989 IX86_BUILTIN_PSRLW512,
31990 IX86_BUILTIN_CVTB2MASK512,
31991 IX86_BUILTIN_CVTW2MASK512,
31992 IX86_BUILTIN_CVTMASK2B512,
31993 IX86_BUILTIN_CVTMASK2W512,
31994 IX86_BUILTIN_PCMPEQB512_MASK,
31995 IX86_BUILTIN_PCMPEQW512_MASK,
31996 IX86_BUILTIN_PCMPGTB512_MASK,
31997 IX86_BUILTIN_PCMPGTW512_MASK,
31998 IX86_BUILTIN_PTESTMB512,
31999 IX86_BUILTIN_PTESTMW512,
32000 IX86_BUILTIN_PTESTNMB512,
32001 IX86_BUILTIN_PTESTNMW512,
32002 IX86_BUILTIN_PSLLVV32HI,
32003 IX86_BUILTIN_PABSB512,
32004 IX86_BUILTIN_PABSW512,
32005 IX86_BUILTIN_BLENDMW512,
32006 IX86_BUILTIN_BLENDMB512,
32007 IX86_BUILTIN_CMPB512,
32008 IX86_BUILTIN_CMPW512,
32009 IX86_BUILTIN_UCMPB512,
32010 IX86_BUILTIN_UCMPW512,
32012 /* Alternate 4 and 8 element gather/scatter for the vectorizer
32013 where all operands are 32-byte or 64-byte wide respectively. */
32014 IX86_BUILTIN_GATHERALTSIV4DF,
32015 IX86_BUILTIN_GATHERALTDIV8SF,
32016 IX86_BUILTIN_GATHERALTSIV4DI,
32017 IX86_BUILTIN_GATHERALTDIV8SI,
32018 IX86_BUILTIN_GATHER3ALTDIV16SF,
32019 IX86_BUILTIN_GATHER3ALTDIV16SI,
32020 IX86_BUILTIN_GATHER3ALTSIV4DF,
32021 IX86_BUILTIN_GATHER3ALTDIV8SF,
32022 IX86_BUILTIN_GATHER3ALTSIV4DI,
32023 IX86_BUILTIN_GATHER3ALTDIV8SI,
32024 IX86_BUILTIN_GATHER3ALTSIV8DF,
32025 IX86_BUILTIN_GATHER3ALTSIV8DI,
32026 IX86_BUILTIN_GATHER3DIV16SF,
32027 IX86_BUILTIN_GATHER3DIV16SI,
32028 IX86_BUILTIN_GATHER3DIV8DF,
32029 IX86_BUILTIN_GATHER3DIV8DI,
32030 IX86_BUILTIN_GATHER3SIV16SF,
32031 IX86_BUILTIN_GATHER3SIV16SI,
32032 IX86_BUILTIN_GATHER3SIV8DF,
32033 IX86_BUILTIN_GATHER3SIV8DI,
32034 IX86_BUILTIN_SCATTERALTSIV8DF,
32035 IX86_BUILTIN_SCATTERALTDIV16SF,
32036 IX86_BUILTIN_SCATTERALTSIV8DI,
32037 IX86_BUILTIN_SCATTERALTDIV16SI,
32038 IX86_BUILTIN_SCATTERDIV16SF,
32039 IX86_BUILTIN_SCATTERDIV16SI,
32040 IX86_BUILTIN_SCATTERDIV8DF,
32041 IX86_BUILTIN_SCATTERDIV8DI,
32042 IX86_BUILTIN_SCATTERSIV16SF,
32043 IX86_BUILTIN_SCATTERSIV16SI,
32044 IX86_BUILTIN_SCATTERSIV8DF,
32045 IX86_BUILTIN_SCATTERSIV8DI,
32048 IX86_BUILTIN_GATHERPFQPD,
32049 IX86_BUILTIN_GATHERPFDPS,
32050 IX86_BUILTIN_GATHERPFDPD,
32051 IX86_BUILTIN_GATHERPFQPS,
32052 IX86_BUILTIN_SCATTERPFDPD,
32053 IX86_BUILTIN_SCATTERPFDPS,
32054 IX86_BUILTIN_SCATTERPFQPD,
32055 IX86_BUILTIN_SCATTERPFQPS,
32058 IX86_BUILTIN_EXP2PD_MASK,
32059 IX86_BUILTIN_EXP2PS_MASK,
32060 IX86_BUILTIN_EXP2PS,
32061 IX86_BUILTIN_RCP28PD,
32062 IX86_BUILTIN_RCP28PS,
32063 IX86_BUILTIN_RCP28SD,
32064 IX86_BUILTIN_RCP28SS,
32065 IX86_BUILTIN_RSQRT28PD,
32066 IX86_BUILTIN_RSQRT28PS,
32067 IX86_BUILTIN_RSQRT28SD,
32068 IX86_BUILTIN_RSQRT28SS,
32071 IX86_BUILTIN_VPMADD52LUQ512,
32072 IX86_BUILTIN_VPMADD52HUQ512,
32073 IX86_BUILTIN_VPMADD52LUQ256,
32074 IX86_BUILTIN_VPMADD52HUQ256,
32075 IX86_BUILTIN_VPMADD52LUQ128,
32076 IX86_BUILTIN_VPMADD52HUQ128,
32077 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
32078 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
32079 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
32080 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
32081 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
32082 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
32085 IX86_BUILTIN_VPMULTISHIFTQB512,
32086 IX86_BUILTIN_VPMULTISHIFTQB256,
32087 IX86_BUILTIN_VPMULTISHIFTQB128,
32088 IX86_BUILTIN_VPERMVARQI512_MASK,
32089 IX86_BUILTIN_VPERMT2VARQI512,
32090 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
32091 IX86_BUILTIN_VPERMI2VARQI512,
32092 IX86_BUILTIN_VPERMVARQI256_MASK,
32093 IX86_BUILTIN_VPERMVARQI128_MASK,
32094 IX86_BUILTIN_VPERMT2VARQI256,
32095 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
32096 IX86_BUILTIN_VPERMT2VARQI128,
32097 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
32098 IX86_BUILTIN_VPERMI2VARQI256,
32099 IX86_BUILTIN_VPERMI2VARQI128,
32101 /* SHA builtins. */
32102 IX86_BUILTIN_SHA1MSG1,
32103 IX86_BUILTIN_SHA1MSG2,
32104 IX86_BUILTIN_SHA1NEXTE,
32105 IX86_BUILTIN_SHA1RNDS4,
32106 IX86_BUILTIN_SHA256MSG1,
32107 IX86_BUILTIN_SHA256MSG2,
32108 IX86_BUILTIN_SHA256RNDS2,
32110 /* CLWB instructions. */
32113 /* PCOMMIT instructions. */
32114 IX86_BUILTIN_PCOMMIT,
32116 /* CLFLUSHOPT instructions. */
32117 IX86_BUILTIN_CLFLUSHOPT,
32119 /* TFmode support builtins. */
32121 IX86_BUILTIN_HUGE_VALQ,
32122 IX86_BUILTIN_FABSQ,
32123 IX86_BUILTIN_COPYSIGNQ,
32125 /* Vectorizer support builtins. */
32126 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
32127 IX86_BUILTIN_CPYSGNPS,
32128 IX86_BUILTIN_CPYSGNPD,
32129 IX86_BUILTIN_CPYSGNPS256,
32130 IX86_BUILTIN_CPYSGNPS512,
32131 IX86_BUILTIN_CPYSGNPD256,
32132 IX86_BUILTIN_CPYSGNPD512,
32133 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
32134 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
32137 /* FMA4 instructions. */
32138 IX86_BUILTIN_VFMADDSS,
32139 IX86_BUILTIN_VFMADDSD,
32140 IX86_BUILTIN_VFMADDPS,
32141 IX86_BUILTIN_VFMADDPD,
32142 IX86_BUILTIN_VFMADDPS256,
32143 IX86_BUILTIN_VFMADDPD256,
32144 IX86_BUILTIN_VFMADDSUBPS,
32145 IX86_BUILTIN_VFMADDSUBPD,
32146 IX86_BUILTIN_VFMADDSUBPS256,
32147 IX86_BUILTIN_VFMADDSUBPD256,
32149 /* FMA3 instructions. */
32150 IX86_BUILTIN_VFMADDSS3,
32151 IX86_BUILTIN_VFMADDSD3,
32153 /* XOP instructions. */
32154 IX86_BUILTIN_VPCMOV,
32155 IX86_BUILTIN_VPCMOV_V2DI,
32156 IX86_BUILTIN_VPCMOV_V4SI,
32157 IX86_BUILTIN_VPCMOV_V8HI,
32158 IX86_BUILTIN_VPCMOV_V16QI,
32159 IX86_BUILTIN_VPCMOV_V4SF,
32160 IX86_BUILTIN_VPCMOV_V2DF,
32161 IX86_BUILTIN_VPCMOV256,
32162 IX86_BUILTIN_VPCMOV_V4DI256,
32163 IX86_BUILTIN_VPCMOV_V8SI256,
32164 IX86_BUILTIN_VPCMOV_V16HI256,
32165 IX86_BUILTIN_VPCMOV_V32QI256,
32166 IX86_BUILTIN_VPCMOV_V8SF256,
32167 IX86_BUILTIN_VPCMOV_V4DF256,
32169 IX86_BUILTIN_VPPERM,
32171 IX86_BUILTIN_VPMACSSWW,
32172 IX86_BUILTIN_VPMACSWW,
32173 IX86_BUILTIN_VPMACSSWD,
32174 IX86_BUILTIN_VPMACSWD,
32175 IX86_BUILTIN_VPMACSSDD,
32176 IX86_BUILTIN_VPMACSDD,
32177 IX86_BUILTIN_VPMACSSDQL,
32178 IX86_BUILTIN_VPMACSSDQH,
32179 IX86_BUILTIN_VPMACSDQL,
32180 IX86_BUILTIN_VPMACSDQH,
32181 IX86_BUILTIN_VPMADCSSWD,
32182 IX86_BUILTIN_VPMADCSWD,
32184 IX86_BUILTIN_VPHADDBW,
32185 IX86_BUILTIN_VPHADDBD,
32186 IX86_BUILTIN_VPHADDBQ,
32187 IX86_BUILTIN_VPHADDWD,
32188 IX86_BUILTIN_VPHADDWQ,
32189 IX86_BUILTIN_VPHADDDQ,
32190 IX86_BUILTIN_VPHADDUBW,
32191 IX86_BUILTIN_VPHADDUBD,
32192 IX86_BUILTIN_VPHADDUBQ,
32193 IX86_BUILTIN_VPHADDUWD,
32194 IX86_BUILTIN_VPHADDUWQ,
32195 IX86_BUILTIN_VPHADDUDQ,
32196 IX86_BUILTIN_VPHSUBBW,
32197 IX86_BUILTIN_VPHSUBWD,
32198 IX86_BUILTIN_VPHSUBDQ,
32200 IX86_BUILTIN_VPROTB,
32201 IX86_BUILTIN_VPROTW,
32202 IX86_BUILTIN_VPROTD,
32203 IX86_BUILTIN_VPROTQ,
32204 IX86_BUILTIN_VPROTB_IMM,
32205 IX86_BUILTIN_VPROTW_IMM,
32206 IX86_BUILTIN_VPROTD_IMM,
32207 IX86_BUILTIN_VPROTQ_IMM,
32209 IX86_BUILTIN_VPSHLB,
32210 IX86_BUILTIN_VPSHLW,
32211 IX86_BUILTIN_VPSHLD,
32212 IX86_BUILTIN_VPSHLQ,
32213 IX86_BUILTIN_VPSHAB,
32214 IX86_BUILTIN_VPSHAW,
32215 IX86_BUILTIN_VPSHAD,
32216 IX86_BUILTIN_VPSHAQ,
32218 IX86_BUILTIN_VFRCZSS,
32219 IX86_BUILTIN_VFRCZSD,
32220 IX86_BUILTIN_VFRCZPS,
32221 IX86_BUILTIN_VFRCZPD,
32222 IX86_BUILTIN_VFRCZPS256,
32223 IX86_BUILTIN_VFRCZPD256,
32225 IX86_BUILTIN_VPCOMEQUB,
32226 IX86_BUILTIN_VPCOMNEUB,
32227 IX86_BUILTIN_VPCOMLTUB,
32228 IX86_BUILTIN_VPCOMLEUB,
32229 IX86_BUILTIN_VPCOMGTUB,
32230 IX86_BUILTIN_VPCOMGEUB,
32231 IX86_BUILTIN_VPCOMFALSEUB,
32232 IX86_BUILTIN_VPCOMTRUEUB,
32234 IX86_BUILTIN_VPCOMEQUW,
32235 IX86_BUILTIN_VPCOMNEUW,
32236 IX86_BUILTIN_VPCOMLTUW,
32237 IX86_BUILTIN_VPCOMLEUW,
32238 IX86_BUILTIN_VPCOMGTUW,
32239 IX86_BUILTIN_VPCOMGEUW,
32240 IX86_BUILTIN_VPCOMFALSEUW,
32241 IX86_BUILTIN_VPCOMTRUEUW,
32243 IX86_BUILTIN_VPCOMEQUD,
32244 IX86_BUILTIN_VPCOMNEUD,
32245 IX86_BUILTIN_VPCOMLTUD,
32246 IX86_BUILTIN_VPCOMLEUD,
32247 IX86_BUILTIN_VPCOMGTUD,
32248 IX86_BUILTIN_VPCOMGEUD,
32249 IX86_BUILTIN_VPCOMFALSEUD,
32250 IX86_BUILTIN_VPCOMTRUEUD,
32252 IX86_BUILTIN_VPCOMEQUQ,
32253 IX86_BUILTIN_VPCOMNEUQ,
32254 IX86_BUILTIN_VPCOMLTUQ,
32255 IX86_BUILTIN_VPCOMLEUQ,
32256 IX86_BUILTIN_VPCOMGTUQ,
32257 IX86_BUILTIN_VPCOMGEUQ,
32258 IX86_BUILTIN_VPCOMFALSEUQ,
32259 IX86_BUILTIN_VPCOMTRUEUQ,
32261 IX86_BUILTIN_VPCOMEQB,
32262 IX86_BUILTIN_VPCOMNEB,
32263 IX86_BUILTIN_VPCOMLTB,
32264 IX86_BUILTIN_VPCOMLEB,
32265 IX86_BUILTIN_VPCOMGTB,
32266 IX86_BUILTIN_VPCOMGEB,
32267 IX86_BUILTIN_VPCOMFALSEB,
32268 IX86_BUILTIN_VPCOMTRUEB,
32270 IX86_BUILTIN_VPCOMEQW,
32271 IX86_BUILTIN_VPCOMNEW,
32272 IX86_BUILTIN_VPCOMLTW,
32273 IX86_BUILTIN_VPCOMLEW,
32274 IX86_BUILTIN_VPCOMGTW,
32275 IX86_BUILTIN_VPCOMGEW,
32276 IX86_BUILTIN_VPCOMFALSEW,
32277 IX86_BUILTIN_VPCOMTRUEW,
32279 IX86_BUILTIN_VPCOMEQD,
32280 IX86_BUILTIN_VPCOMNED,
32281 IX86_BUILTIN_VPCOMLTD,
32282 IX86_BUILTIN_VPCOMLED,
32283 IX86_BUILTIN_VPCOMGTD,
32284 IX86_BUILTIN_VPCOMGED,
32285 IX86_BUILTIN_VPCOMFALSED,
32286 IX86_BUILTIN_VPCOMTRUED,
32288 IX86_BUILTIN_VPCOMEQQ,
32289 IX86_BUILTIN_VPCOMNEQ,
32290 IX86_BUILTIN_VPCOMLTQ,
32291 IX86_BUILTIN_VPCOMLEQ,
32292 IX86_BUILTIN_VPCOMGTQ,
32293 IX86_BUILTIN_VPCOMGEQ,
32294 IX86_BUILTIN_VPCOMFALSEQ,
32295 IX86_BUILTIN_VPCOMTRUEQ,
32297 /* LWP instructions. */
32298 IX86_BUILTIN_LLWPCB,
32299 IX86_BUILTIN_SLWPCB,
32300 IX86_BUILTIN_LWPVAL32,
32301 IX86_BUILTIN_LWPVAL64,
32302 IX86_BUILTIN_LWPINS32,
32303 IX86_BUILTIN_LWPINS64,
32308 IX86_BUILTIN_XBEGIN,
32310 IX86_BUILTIN_XABORT,
32311 IX86_BUILTIN_XTEST,
32314 IX86_BUILTIN_BNDMK,
32315 IX86_BUILTIN_BNDSTX,
32316 IX86_BUILTIN_BNDLDX,
32317 IX86_BUILTIN_BNDCL,
32318 IX86_BUILTIN_BNDCU,
32319 IX86_BUILTIN_BNDRET,
32320 IX86_BUILTIN_BNDNARROW,
32321 IX86_BUILTIN_BNDINT,
32322 IX86_BUILTIN_SIZEOF,
32323 IX86_BUILTIN_BNDLOWER,
32324 IX86_BUILTIN_BNDUPPER,
32326 /* BMI instructions. */
32327 IX86_BUILTIN_BEXTR32,
32328 IX86_BUILTIN_BEXTR64,
32331 /* TBM instructions. */
32332 IX86_BUILTIN_BEXTRI32,
32333 IX86_BUILTIN_BEXTRI64,
32335 /* BMI2 instructions. */
32336 IX86_BUILTIN_BZHI32,
32337 IX86_BUILTIN_BZHI64,
32338 IX86_BUILTIN_PDEP32,
32339 IX86_BUILTIN_PDEP64,
32340 IX86_BUILTIN_PEXT32,
32341 IX86_BUILTIN_PEXT64,
32343 /* ADX instructions. */
32344 IX86_BUILTIN_ADDCARRYX32,
32345 IX86_BUILTIN_ADDCARRYX64,
32347 /* SBB instructions. */
32348 IX86_BUILTIN_SBB32,
32349 IX86_BUILTIN_SBB64,
32351 /* FSGSBASE instructions. */
32352 IX86_BUILTIN_RDFSBASE32,
32353 IX86_BUILTIN_RDFSBASE64,
32354 IX86_BUILTIN_RDGSBASE32,
32355 IX86_BUILTIN_RDGSBASE64,
32356 IX86_BUILTIN_WRFSBASE32,
32357 IX86_BUILTIN_WRFSBASE64,
32358 IX86_BUILTIN_WRGSBASE32,
32359 IX86_BUILTIN_WRGSBASE64,
32361 /* RDRND instructions. */
32362 IX86_BUILTIN_RDRAND16_STEP,
32363 IX86_BUILTIN_RDRAND32_STEP,
32364 IX86_BUILTIN_RDRAND64_STEP,
32366 /* RDSEED instructions. */
32367 IX86_BUILTIN_RDSEED16_STEP,
32368 IX86_BUILTIN_RDSEED32_STEP,
32369 IX86_BUILTIN_RDSEED64_STEP,
32371 /* F16C instructions. */
32372 IX86_BUILTIN_CVTPH2PS,
32373 IX86_BUILTIN_CVTPH2PS256,
32374 IX86_BUILTIN_CVTPS2PH,
32375 IX86_BUILTIN_CVTPS2PH256,
32377 /* MONITORX and MWAITX instrucions. */
32378 IX86_BUILTIN_MONITORX,
32379 IX86_BUILTIN_MWAITX,
32381 /* CFString built-in for darwin */
32382 IX86_BUILTIN_CFSTRING,
32384 /* Builtins to get CPU type and supported features. */
32385 IX86_BUILTIN_CPU_INIT,
32386 IX86_BUILTIN_CPU_IS,
32387 IX86_BUILTIN_CPU_SUPPORTS,
32389 /* Read/write FLAGS register built-ins. */
32390 IX86_BUILTIN_READ_FLAGS,
32391 IX86_BUILTIN_WRITE_FLAGS,
32393 /* PKU instructions. */
32394 IX86_BUILTIN_RDPKRU,
32395 IX86_BUILTIN_WRPKRU,
32400 /* Table for the ix86 builtin decls. */
32401 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
32403 /* Table of all of the builtin functions that are possible with different ISA's
32404 but are waiting to be built until a function is declared to use that
32406 struct builtin_isa {
32407 const char *name; /* function name */
32408 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
32409 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
32410 bool const_p; /* true if the declaration is constant */
32411 bool leaf_p; /* true if the declaration has leaf attribute */
32412 bool nothrow_p; /* true if the declaration has nothrow attribute */
32413 bool set_and_not_built_p;
32416 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
32418 /* Bits that can still enable any inclusion of a builtin. */
32419 static HOST_WIDE_INT deferred_isa_values = 0;
32421 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
32422 of which isa_flags to use in the ix86_builtins_isa array. Stores the
32423 function decl in the ix86_builtins array. Returns the function decl or
32424 NULL_TREE, if the builtin was not added.
32426 If the front end has a special hook for builtin functions, delay adding
32427 builtin functions that aren't in the current ISA until the ISA is changed
32428 with function specific optimization. Doing so, can save about 300K for the
32429 default compiler. When the builtin is expanded, check at that time whether
32432 If the front end doesn't have a special hook, record all builtins, even if
32433 it isn't an instruction set in the current ISA in case the user uses
32434 function specific options for a different ISA, so that we don't get scope
32435 errors if a builtin is added in the middle of a function scope. */
32438 def_builtin (HOST_WIDE_INT mask, const char *name,
32439 enum ix86_builtin_func_type tcode,
32440 enum ix86_builtins code)
32442 tree decl = NULL_TREE;
32444 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
32446 ix86_builtins_isa[(int) code].isa = mask;
32448 /* OPTION_MASK_ISA_AVX512VL has special meaning. Despite of generic case,
32449 where any bit set means that built-in is enable, this bit must be *and-ed*
32450 with another one. E.g.: OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL
32451 means that *both* cpuid bits must be set for the built-in to be available.
32452 Handle this here. */
32453 if (mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
32454 mask &= ~OPTION_MASK_ISA_AVX512VL;
32456 mask &= ~OPTION_MASK_ISA_64BIT;
32458 || (mask & ix86_isa_flags) != 0
32459 || (lang_hooks.builtin_function
32460 == lang_hooks.builtin_function_ext_scope))
32463 tree type = ix86_get_builtin_func_type (tcode);
32464 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32466 ix86_builtins[(int) code] = decl;
32467 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
32471 /* Just a MASK where set_and_not_built_p == true can potentially
32472 include a builtin. */
32473 deferred_isa_values |= mask;
32474 ix86_builtins[(int) code] = NULL_TREE;
32475 ix86_builtins_isa[(int) code].tcode = tcode;
32476 ix86_builtins_isa[(int) code].name = name;
32477 ix86_builtins_isa[(int) code].leaf_p = false;
32478 ix86_builtins_isa[(int) code].nothrow_p = false;
32479 ix86_builtins_isa[(int) code].const_p = false;
32480 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
32487 /* Like def_builtin, but also marks the function decl "const". */
32490 def_builtin_const (HOST_WIDE_INT mask, const char *name,
32491 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
32493 tree decl = def_builtin (mask, name, tcode, code);
32495 TREE_READONLY (decl) = 1;
32497 ix86_builtins_isa[(int) code].const_p = true;
32502 /* Add any new builtin functions for a given ISA that may not have been
32503 declared. This saves a bit of space compared to adding all of the
32504 declarations to the tree, even if we didn't use them. */
32507 ix86_add_new_builtins (HOST_WIDE_INT isa)
32509 if ((isa & deferred_isa_values) == 0)
32512 /* Bits in ISA value can be removed from potential isa values. */
32513 deferred_isa_values &= ~isa;
32516 tree saved_current_target_pragma = current_target_pragma;
32517 current_target_pragma = NULL_TREE;
32519 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
32521 if ((ix86_builtins_isa[i].isa & isa) != 0
32522 && ix86_builtins_isa[i].set_and_not_built_p)
32526 /* Don't define the builtin again. */
32527 ix86_builtins_isa[i].set_and_not_built_p = false;
32529 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
32530 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
32531 type, i, BUILT_IN_MD, NULL,
32534 ix86_builtins[i] = decl;
32535 if (ix86_builtins_isa[i].const_p)
32536 TREE_READONLY (decl) = 1;
32537 if (ix86_builtins_isa[i].leaf_p)
32538 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
32540 if (ix86_builtins_isa[i].nothrow_p)
32541 TREE_NOTHROW (decl) = 1;
32545 current_target_pragma = saved_current_target_pragma;
32548 /* Bits for builtin_description.flag. */
32550 /* Set when we don't support the comparison natively, and should
32551 swap_comparison in order to support it. */
32552 #define BUILTIN_DESC_SWAP_OPERANDS 1
32554 struct builtin_description
32556 const HOST_WIDE_INT mask;
32557 const enum insn_code icode;
32558 const char *const name;
32559 const enum ix86_builtins code;
32560 const enum rtx_code comparison;
32564 static const struct builtin_description bdesc_comi[] =
32566 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
32567 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
32568 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
32569 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
32570 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
32571 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
32572 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
32573 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
32574 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
32575 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
32576 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
32577 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
32578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
32579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
32580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
32581 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
32582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
32583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
32584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
32585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
32586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
32587 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
32588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
32589 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
32592 static const struct builtin_description bdesc_pcmpestr[] =
32595 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
32596 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
32597 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
32598 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
32599 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
32600 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
32601 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
32604 static const struct builtin_description bdesc_pcmpistr[] =
32607 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
32608 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
32609 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
32610 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
32611 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
32612 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
32613 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
32616 /* Special builtins with variable number of arguments. */
32617 static const struct builtin_description bdesc_special_args[] =
32619 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
32620 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
32621 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
32623 /* 80387 (for use internally for atomic compound assignment). */
32624 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
32625 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
32626 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
32627 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
32630 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
32633 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
32635 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
32636 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
32637 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
32638 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32639 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32640 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32641 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32642 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32643 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32645 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
32646 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
32647 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32648 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32649 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32650 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32651 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32652 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32655 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
32656 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
32657 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
32659 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
32660 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
32661 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
32662 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
32664 /* SSE or 3DNow!A */
32665 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
32666 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
32669 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
32670 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
32671 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
32672 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
32673 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
32674 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
32675 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
32676 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
32677 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
32678 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
32680 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
32681 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
32684 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
32687 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
32690 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
32691 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
32694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
32695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
32697 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
32698 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
32699 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
32700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
32701 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
32703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
32704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
32705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
32706 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
32707 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
32708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
32709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
32711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
32712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
32713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
32715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
32716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
32717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
32718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
32719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
32720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
32721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
32722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
32725 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
32726 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
32727 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
32728 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
32729 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
32730 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
32731 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
32732 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
32733 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
32736 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
32737 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
32738 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
32739 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
32740 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32741 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32742 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32743 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32744 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32745 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32746 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32747 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32748 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32749 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32750 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32751 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32752 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32753 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32754 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32755 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32756 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
32757 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
32758 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
32759 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
32760 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
32761 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
32762 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
32763 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
32764 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
32765 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
32766 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
32767 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
32768 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
32769 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
32770 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
32771 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
32772 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
32773 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
32774 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
32775 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
32776 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
32777 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
32778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
32779 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
32780 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
32781 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
32782 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
32784 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
32785 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
32786 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
32787 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
32788 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
32789 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
32792 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32793 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
32794 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32795 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
32796 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
32797 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
32798 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
32799 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
32802 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32803 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
32804 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
32807 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI },
32808 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI },
32809 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_USI },
32810 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_UDI },
32813 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_UHI },
32814 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_UQI },
32815 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI },
32816 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_UHI },
32817 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32818 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32819 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32820 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32821 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
32822 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
32823 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
32824 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
32825 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32826 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32827 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32828 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32829 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
32830 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
32831 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
32832 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
32833 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32834 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32835 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32836 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32837 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
32838 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
32839 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
32840 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
32841 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32842 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32843 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32844 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32845 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
32846 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
32847 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
32848 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
32849 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_UHI },
32850 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_UQI },
32851 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_USI },
32852 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_UHI },
32853 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
32854 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
32855 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
32856 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
32857 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
32858 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
32859 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
32860 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
32861 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32862 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32863 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32864 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32865 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32866 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32867 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32868 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32869 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32870 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32871 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32872 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32873 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32874 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32875 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32876 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32877 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
32878 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
32879 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
32880 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
32881 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
32882 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
32883 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
32884 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
32885 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
32886 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
32887 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
32888 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
32889 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
32890 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
32891 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
32892 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
32893 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
32894 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
32895 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
32896 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
32897 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
32898 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
32899 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
32900 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
32901 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
32902 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
32903 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
32904 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
32905 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
32906 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
32909 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
32911 /* RDPKRU and WRPKRU. */
32912 { OPTION_MASK_ISA_PKU, CODE_FOR_rdpkru, "__builtin_ia32_rdpkru", IX86_BUILTIN_RDPKRU, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32913 { OPTION_MASK_ISA_PKU, CODE_FOR_wrpkru, "__builtin_ia32_wrpkru", IX86_BUILTIN_WRPKRU, UNKNOWN, (int) VOID_FTYPE_UNSIGNED }
32916 /* Builtins with variable number of arguments. */
32917 static const struct builtin_description bdesc_args[] =
32919 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
32920 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
32921 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
32922 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
32923 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
32924 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
32925 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
32928 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32929 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32930 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32931 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32932 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32933 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32935 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32936 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32937 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32938 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32939 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32940 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32941 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32942 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32944 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32945 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32947 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32948 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32949 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32950 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32952 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32953 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32954 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32955 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32956 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32957 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32959 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32960 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32961 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32962 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32963 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
32964 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
32966 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
32967 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
32968 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
32970 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
32972 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
32973 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
32974 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
32975 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
32976 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
32977 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
32979 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
32980 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
32981 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
32982 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
32983 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
32984 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
32986 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
32987 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
32988 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
32989 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
32992 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
32993 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
32994 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
32995 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
32997 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32998 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32999 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
33000 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
33001 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
33002 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
33003 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
33004 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
33005 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
33006 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
33007 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
33008 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
33009 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
33010 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
33011 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33014 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
33015 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
33016 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
33017 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
33018 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
33019 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
33022 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
33023 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33024 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33025 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33026 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33027 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33028 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
33029 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
33030 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
33031 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
33032 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
33033 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
33035 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33037 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33038 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33039 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33040 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33041 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33042 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33043 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33044 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33046 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
33047 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
33048 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
33049 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
33050 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
33051 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
33052 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
33053 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
33054 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
33055 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
33056 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
33057 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
33058 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
33059 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
33060 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
33061 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
33062 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
33063 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
33064 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
33065 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
33067 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33068 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33069 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33070 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33072 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33073 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33074 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33075 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33077 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33079 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33080 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33081 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33082 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33083 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33085 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
33086 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
33087 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
33089 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
33091 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
33092 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
33093 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
33095 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
33096 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
33098 /* SSE MMX or 3Dnow!A */
33099 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33100 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33101 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33103 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33104 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33105 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33106 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33108 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
33109 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
33111 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
33114 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33116 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
33117 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
33118 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
33119 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
33120 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
33122 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
33123 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
33124 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
33125 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
33126 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
33128 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
33130 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
33131 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
33132 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
33133 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
33135 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
33136 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
33137 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
33139 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33140 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33141 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33142 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33143 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33144 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33145 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33146 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33148 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
33149 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
33150 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
33151 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
33152 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
33153 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
33154 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
33155 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
33156 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
33157 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
33158 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
33159 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
33160 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
33161 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
33162 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
33163 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
33164 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
33165 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
33166 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
33167 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
33169 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33170 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33171 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33172 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33174 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33175 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33176 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33177 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33179 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33181 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33182 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33183 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33185 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
33187 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33188 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33189 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33190 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33191 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33192 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33193 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33194 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33197 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33199 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33200 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33201 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33202 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33203 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33205 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33206 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
33208 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33209 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33210 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33211 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33213 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33214 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33216 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33218 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33219 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33220 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33221 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33223 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33224 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33225 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33226 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33228 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33229 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33230 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33231 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33232 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33233 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33234 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33235 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33237 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
33238 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
33239 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
33241 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33242 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
33244 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
33245 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
33247 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
33249 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
33250 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
33251 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
33252 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
33254 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
33255 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
33256 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
33257 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
33258 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
33259 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
33260 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
33262 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
33263 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
33264 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
33265 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
33266 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
33267 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
33268 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
33270 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
33271 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
33272 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
33273 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
33275 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
33276 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
33277 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
33279 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
33281 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
33284 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
33285 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
33288 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
33289 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33291 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33292 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33293 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33294 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33295 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33296 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33299 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
33300 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
33301 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
33302 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
33303 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
33304 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
33306 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33307 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33308 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33309 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33310 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33311 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33312 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33313 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33314 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33315 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33316 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33317 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33318 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
33319 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
33320 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33321 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33322 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33323 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33324 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33325 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33326 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33327 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33328 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33329 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33332 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
33333 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
33336 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33337 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33338 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
33339 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
33340 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33341 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33342 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33343 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
33344 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
33345 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
33347 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
33348 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
33349 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
33350 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
33351 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
33352 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
33353 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
33354 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
33355 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
33356 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
33357 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
33358 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
33359 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
33361 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
33362 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33363 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33364 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33365 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33366 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33367 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33368 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33369 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33370 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33371 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
33372 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33375 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
33376 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
33377 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33378 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33380 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
33381 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
33382 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
33383 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
33385 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
33386 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
33388 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
33389 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
33391 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
33392 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
33393 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
33394 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
33396 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
33397 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
33399 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33400 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
33402 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
33403 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
33404 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
33407 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33408 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
33409 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
33410 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33411 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33414 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
33415 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
33416 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
33417 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33420 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
33421 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
33423 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33424 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33425 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33426 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33429 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
33432 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33433 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33434 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33435 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33436 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33437 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33438 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33439 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33440 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33441 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33442 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33443 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33444 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33445 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33446 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33447 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33448 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33449 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33450 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33451 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33452 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33453 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33454 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33455 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33456 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33457 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33459 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
33460 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
33461 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
33462 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
33464 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
33465 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33466 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
33467 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
33468 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33469 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
33470 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33471 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33472 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33473 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33474 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33475 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
33476 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33477 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
33478 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
33479 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
33480 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
33481 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
33482 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
33483 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
33484 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
33485 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
33486 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
33487 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
33488 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
33489 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33490 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
33491 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
33492 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
33493 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
33494 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
33495 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
33496 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
33497 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
33499 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33500 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33501 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
33503 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
33504 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33505 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33506 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33507 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33509 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33511 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
33512 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
33514 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
33515 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
33516 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
33517 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
33519 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
33520 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
33522 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
33523 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
33525 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
33526 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
33527 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
33528 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
33530 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
33531 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
33533 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33534 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
33536 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33537 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33538 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33539 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33541 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
33542 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
33543 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
33544 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
33545 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
33546 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
33548 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
33549 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
33550 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
33551 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
33552 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
33553 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
33554 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
33555 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
33556 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
33557 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
33558 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
33559 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
33560 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
33561 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
33562 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
33564 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
33565 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
33567 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33568 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33570 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
33573 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
33574 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
33575 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
33576 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
33577 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
33578 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
33579 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
33580 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
33581 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33582 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33583 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33584 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33585 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33586 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33587 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33588 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33589 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
33590 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33591 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33592 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33593 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33594 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
33595 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
33596 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33597 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33598 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33599 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33600 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33601 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33602 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33603 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33604 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33605 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33606 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33607 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33608 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33609 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33610 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
33611 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
33612 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33613 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33614 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33615 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33616 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33617 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33618 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33619 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33620 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33621 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33622 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33623 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33624 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
33625 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
33626 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
33627 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
33628 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
33629 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
33630 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
33631 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
33632 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
33633 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
33634 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
33635 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
33636 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
33637 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
33638 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33639 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33640 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33641 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33642 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33643 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
33644 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33645 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
33646 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33647 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
33648 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
33649 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
33650 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33651 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33652 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33653 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
33654 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
33655 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
33656 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
33657 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
33658 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
33659 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
33660 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
33661 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
33662 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
33663 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
33664 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
33665 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
33666 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
33667 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
33668 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
33669 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
33670 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
33671 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33672 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33673 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33674 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33675 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33676 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33677 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33678 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33679 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33680 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33681 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33682 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33683 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33684 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33685 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33686 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33687 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33688 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33689 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
33690 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
33691 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
33692 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
33693 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
33694 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
33695 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
33696 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
33697 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
33698 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
33699 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
33700 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
33701 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
33702 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33703 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
33704 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
33705 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
33706 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
33707 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
33708 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
33709 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33710 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33711 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33712 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33713 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33714 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33715 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33716 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33717 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33718 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33720 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
33723 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33724 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33725 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
33728 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33729 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33732 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
33733 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
33734 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
33735 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
33738 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33739 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33740 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33741 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33742 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33743 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33746 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
33747 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
33748 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
33749 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
33750 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
33751 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
33752 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI },
33753 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI },
33754 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33755 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33756 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33757 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33758 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
33759 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_UQI },
33760 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
33761 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_UQI },
33762 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI },
33763 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
33764 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI },
33765 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI },
33766 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33767 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33768 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI },
33769 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_UHI },
33770 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI },
33771 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
33772 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33773 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33774 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33775 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33776 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_UQI },
33777 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_UQI },
33778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_UQI },
33779 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_UQI },
33780 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI },
33781 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI },
33782 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI },
33783 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI },
33784 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33785 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33786 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33787 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33788 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33789 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33790 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33791 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33792 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33793 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33794 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33795 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33796 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33797 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33798 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33799 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
33800 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_UHI },
33801 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_UQI },
33802 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_UHI },
33803 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI },
33804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_UQI },
33805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33806 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33807 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33808 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33809 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33810 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
33824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
33825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
33826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
33827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
33828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
33829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
33830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
33831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
33832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
33833 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI },
33834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI },
33835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI },
33836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI },
33837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI },
33838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
33839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
33840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
33841 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
33842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
33843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI },
33844 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI },
33845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI },
33846 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI },
33847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI },
33848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI },
33849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI },
33851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
33863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
33865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33866 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33868 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
33869 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33870 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
33871 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33872 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33873 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33874 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
33875 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33876 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
33877 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33878 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33879 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33880 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33881 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33882 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33888 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33889 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33890 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33891 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33892 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33893 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33894 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33895 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33896 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33897 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33898 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33899 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33900 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI },
33901 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI },
33902 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI },
33903 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI },
33904 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI },
33905 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI },
33906 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI },
33907 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI },
33908 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
33909 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
33910 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
33911 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
33912 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33913 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33914 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33915 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
33917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
33920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
33921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
33923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI },
33924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
33925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
33926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI },
33929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI },
33930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI },
33931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI },
33932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
33935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
33937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI },
33939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI },
33940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI },
33941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI },
33943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
33944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
33945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
33946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
33947 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
33948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
33949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
33950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
33952 /* Mask arithmetic operations */
33953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) UHI_FTYPE_UHI },
33956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) UHI_FTYPE_UHI },
33965 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33966 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33967 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33968 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
33969 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33970 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33971 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
33974 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT },
33975 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT },
33976 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
33977 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
33978 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
33979 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
33980 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
33981 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
33982 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
33983 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
33984 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
33985 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
33986 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
33987 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
33988 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33989 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33990 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33991 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33992 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33993 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33994 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33995 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33996 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33997 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33998 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33999 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34000 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34001 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34002 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34003 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34004 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34005 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34006 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34007 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34008 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34009 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34010 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34011 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34012 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34013 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34014 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34015 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34016 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34017 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34018 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34019 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34020 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
34021 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
34022 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34023 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_UQI },
34024 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_UQI },
34025 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
34026 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
34027 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_UQI },
34028 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_UQI },
34029 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI },
34030 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI },
34031 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
34032 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
34033 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
34034 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
34035 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
34036 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
34037 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
34038 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
34039 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
34040 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
34041 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
34042 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
34043 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
34044 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
34045 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
34046 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
34047 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
34048 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
34049 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
34050 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
34051 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
34052 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
34053 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
34054 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
34055 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_UQI },
34056 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_UQI },
34057 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_UQI },
34058 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_UQI },
34059 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI },
34060 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_UQI },
34061 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI },
34062 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_UQI },
34063 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_UQI },
34064 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_UQI },
34065 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_USI },
34066 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_USI },
34067 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
34068 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_UHI },
34069 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_UHI },
34070 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_UHI },
34071 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
34072 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_UQI },
34073 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
34074 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_UQI },
34075 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34076 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_UQI },
34077 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_UQI },
34078 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_UQI },
34079 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34080 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_UQI },
34081 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
34082 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34083 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_UQI },
34084 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_UQI },
34085 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_UQI },
34086 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI },
34087 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI },
34088 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_UHI },
34089 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_UQI },
34090 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_UQI },
34091 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_UQI },
34092 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_UQI },
34093 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_UQI },
34094 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_UQI },
34095 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_UQI },
34096 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_UQI },
34097 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_UQI },
34098 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_UQI },
34099 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_UQI },
34100 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_UHI },
34101 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_UQI },
34102 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_UQI },
34103 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_UQI },
34104 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_UQI },
34105 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_UQI },
34106 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_UQI },
34107 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_UQI },
34108 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_UQI },
34109 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_UQI },
34110 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_UQI },
34111 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_UQI },
34112 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34113 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
34114 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
34115 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
34116 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34117 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34118 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34119 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34120 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34121 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34122 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34123 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34124 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34125 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34126 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34127 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34128 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34129 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34130 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34131 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34132 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34133 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34134 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34135 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34136 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34137 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34138 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34139 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34140 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34141 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34142 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34143 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34144 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34145 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34146 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34147 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34148 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34149 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34150 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34151 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34152 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34153 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34154 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34155 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34156 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34157 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34158 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34159 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34160 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34161 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34162 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34163 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34164 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34165 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34166 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34167 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34168 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34169 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34170 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
34171 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI },
34172 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI },
34173 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
34174 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
34175 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
34176 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
34177 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
34178 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
34179 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
34180 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
34181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
34182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
34183 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
34184 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
34185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
34186 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
34187 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
34188 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
34189 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
34190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
34191 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
34192 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
34193 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
34194 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
34195 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
34196 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
34197 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
34198 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
34199 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
34200 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
34201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
34202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
34203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
34204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
34205 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
34206 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
34207 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
34208 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
34209 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
34210 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
34211 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI },
34212 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
34213 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI },
34214 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34217 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34218 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI },
34219 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI },
34220 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI },
34221 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI },
34222 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI },
34223 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI },
34224 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI },
34225 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI },
34226 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34227 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34228 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34229 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34230 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34231 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34232 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34233 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34234 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34235 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34236 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34237 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34238 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34239 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34240 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34241 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34242 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34243 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34244 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34245 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34246 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34247 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34248 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34249 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34251 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34252 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34253 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34254 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34255 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
34256 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34257 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
34258 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34259 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
34260 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34261 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34262 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34263 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
34264 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34265 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34266 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34267 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
34268 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34269 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34270 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34271 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34272 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
34274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34275 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34276 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34277 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
34278 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34279 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34280 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34281 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34282 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34283 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34284 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34285 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34286 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34287 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34288 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34289 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34290 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34291 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34292 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_USI },
34293 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_UHI },
34294 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_USI },
34295 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_UHI },
34296 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
34297 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34298 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
34299 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
34300 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI },
34301 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI },
34302 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI },
34303 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI },
34304 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI },
34305 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI },
34306 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI },
34307 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI },
34308 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34309 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34310 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34311 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34312 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34313 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34314 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34315 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34316 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34317 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34318 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34319 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34320 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34321 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34322 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34323 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34324 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34325 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34326 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34327 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34330 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34331 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34334 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34335 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34338 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34339 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34340 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34341 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34342 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34343 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34344 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34345 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34346 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34347 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34348 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34349 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34350 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34351 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34352 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34353 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34354 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34355 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34356 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI },
34357 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI },
34358 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34359 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34360 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_UHI },
34361 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_UQI },
34362 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_UQI },
34363 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_UQI },
34364 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34365 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34366 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
34367 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
34368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
34369 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
34370 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
34371 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
34372 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
34373 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
34374 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
34375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
34376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34377 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
34378 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34379 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34380 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34381 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34382 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34383 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34384 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_UQI },
34385 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_UQI },
34386 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_UQI },
34387 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_UQI },
34388 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_UQI },
34389 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_UQI },
34390 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_UQI },
34391 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_UQI },
34392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34394 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34395 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34396 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34398 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_UQI },
34399 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_UQI },
34400 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_UQI },
34401 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_UQI },
34402 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
34403 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
34404 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34405 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34406 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34407 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34408 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34409 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_UQI },
34411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_UQI },
34412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_UQI },
34413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_UQI },
34414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_UQI },
34415 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_UQI },
34416 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34417 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34418 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34419 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34420 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34421 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34422 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34423 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34424 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
34425 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI },
34426 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
34427 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI },
34428 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34429 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34430 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34431 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34432 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34433 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34434 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34435 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34436 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34437 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34439 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34440 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34441 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34442 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34443 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34447 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34448 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34449 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
34450 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34451 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34452 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34453 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
34454 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34455 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34456 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34457 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34458 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34459 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34460 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34461 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34464 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_UQI },
34465 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_UQI },
34466 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
34467 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_UQI },
34468 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_UQI },
34469 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
34470 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) UHI_FTYPE_V16QI },
34471 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) USI_FTYPE_V32QI },
34472 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) UQI_FTYPE_V8HI },
34473 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) UHI_FTYPE_V16HI },
34474 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) UQI_FTYPE_V4SI },
34475 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) UQI_FTYPE_V8SI },
34476 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) UQI_FTYPE_V2DI },
34477 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) UQI_FTYPE_V4DI },
34478 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_UHI },
34479 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_USI },
34480 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_UQI },
34481 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_UHI },
34482 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_UQI },
34483 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_UQI },
34484 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_UQI },
34485 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_UQI },
34486 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
34487 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
34488 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
34489 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
34490 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
34491 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
34492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
34493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
34494 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
34495 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
34496 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
34497 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
34498 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
34499 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
34500 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
34501 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
34502 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
34503 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
34504 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
34505 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
34506 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
34507 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
34508 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
34509 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
34510 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
34511 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
34512 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
34513 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
34514 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
34515 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
34516 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
34517 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
34518 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_UQI },
34519 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_UQI },
34520 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_UHI },
34521 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_UHI },
34522 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34523 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34524 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34525 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34526 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34527 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34528 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34529 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34532 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34533 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34534 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34536 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34538 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34539 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34540 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34541 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34542 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34543 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34544 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34545 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34546 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34547 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34548 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34549 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34550 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34551 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34552 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34553 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34554 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34555 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34556 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34557 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34558 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34559 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34560 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34561 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34562 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34563 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34564 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34565 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34566 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34567 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34568 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34569 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34570 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34571 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34572 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34573 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34574 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34575 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34576 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34577 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34578 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34579 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34580 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34581 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34582 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34583 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34584 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34585 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34586 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34587 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34588 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34589 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34590 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34591 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34592 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34593 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34594 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI },
34595 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI },
34596 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI },
34597 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI },
34598 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_UQI },
34599 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_UQI },
34600 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_UQI },
34601 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_UQI },
34602 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34603 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34604 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34605 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34606 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34607 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34608 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34609 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34610 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34611 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34612 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34613 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34614 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34615 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34616 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34617 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34618 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34619 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34620 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_UHI },
34621 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_UQI },
34622 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_UHI },
34623 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_UQI },
34624 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34625 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34626 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34627 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34628 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
34629 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
34630 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34631 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
34632 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
34633 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
34634 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
34635 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_UQI },
34636 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_UQI },
34637 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
34638 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
34639 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
34640 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
34641 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34642 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
34643 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34644 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34645 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34646 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34647 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34648 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34649 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34650 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34651 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
34652 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
34653 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
34654 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
34655 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34656 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34657 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_UQI },
34658 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_UQI },
34659 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_UQI },
34660 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_UQI },
34661 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_UQI },
34662 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_UQI },
34663 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34664 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34665 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34666 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_INT_UQI },
34667 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_INT_UQI },
34668 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_INT_UQI },
34669 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_INT_UQI },
34670 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_INT_USI },
34671 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_INT_UHI },
34672 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_INT_USI },
34673 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_INT_UHI },
34674 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_UQI },
34675 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_UQI },
34676 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_INT_UQI },
34677 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_INT_UQI },
34678 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_INT_UQI },
34679 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_INT_UQI },
34680 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_INT_UHI },
34681 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_INT_UQI },
34682 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_INT_UHI },
34683 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_INT_UQI },
34684 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI },
34685 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI },
34688 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
34689 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
34690 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI },
34691 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI },
34692 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_UHI },
34693 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_UHI },
34694 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_UQI },
34695 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_UQI },
34696 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_UQI },
34697 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_UQI },
34698 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
34699 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI },
34700 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34701 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
34702 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34703 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
34704 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34705 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
34706 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34707 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI},
34708 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34709 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI },
34710 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI },
34711 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI },
34712 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI },
34713 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_UQI },
34714 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_UHI },
34715 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) UHI_FTYPE_V16SI },
34716 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) UQI_FTYPE_V8DI },
34717 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_UHI },
34718 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_UQI },
34721 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) USI_FTYPE_USI_USI },
34722 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) UDI_FTYPE_UDI_UDI },
34723 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI },
34724 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
34725 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
34726 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI },
34727 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
34728 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT },
34729 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
34730 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
34731 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
34732 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI },
34733 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_UDI },
34734 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_UDI },
34735 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_USI },
34736 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_USI },
34737 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI },
34738 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI },
34739 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34740 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34741 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34742 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34743 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34744 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34745 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34746 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34747 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34748 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34749 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34750 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34751 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34752 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34753 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34754 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34755 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34756 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34757 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34758 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34759 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34760 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34761 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34762 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34763 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34764 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34765 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
34766 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
34767 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
34768 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34769 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34770 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34771 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34772 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34773 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
34774 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI },
34775 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI },
34776 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34777 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_USI },
34778 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_UHI },
34779 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34780 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34781 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34782 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34783 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34784 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34785 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34786 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34787 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34788 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
34789 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34790 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
34791 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) UDI_FTYPE_V64QI },
34792 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) USI_FTYPE_V32HI },
34793 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_UDI },
34794 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_USI },
34795 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34796 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34797 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34798 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34799 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34800 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34801 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34802 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34803 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34804 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
34805 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
34806 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
34807 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
34808 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI },
34809 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI },
34810 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI },
34811 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI },
34814 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34815 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34816 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34817 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34818 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34819 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34820 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34821 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34822 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34823 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34824 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34825 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34828 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34829 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34830 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34831 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34832 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34833 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34834 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34835 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34836 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34837 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34838 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34839 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34840 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34841 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34842 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34845 /* Builtins with rounding support. */
34846 static const struct builtin_description bdesc_round_args[] =
34849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) UQI_FTYPE_V8DF_V8DF_INT_UQI_INT },
34854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) UHI_FTYPE_V16SF_V16SF_INT_UHI_INT },
34855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI_INT },
34856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI_INT },
34857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
34858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
34859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
34860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
34862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
34864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
34866 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
34868 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
34869 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
34870 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
34871 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
34872 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34873 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34874 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34875 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34876 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
34877 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
34878 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
34879 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
34880 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34881 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34882 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
34885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
34886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
34887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
34888 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
34889 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
34890 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
34891 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
34892 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34893 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34894 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34895 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34896 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
34897 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
34898 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
34899 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
34900 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34901 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34902 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34903 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34904 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34905 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34906 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34907 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34908 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34909 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34910 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34911 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34912 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
34913 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
34914 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
34915 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
34916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
34929 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
34930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
34931 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
34932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
34933 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
34934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
34935 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
34936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
34937 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
34938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
34939 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
34940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
34941 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
34942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
34943 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
34944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
34951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
34952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34970 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34971 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34972 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34973 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34974 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34975 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34976 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34977 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34978 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34979 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34982 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
34983 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
34984 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34985 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34986 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34987 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34988 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
34989 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
34990 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
34991 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
34992 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34993 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34994 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34995 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34996 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
34997 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
35000 /* Bultins for MPX. */
35001 static const struct builtin_description bdesc_mpx[] =
35003 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
35004 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
35005 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
35008 /* Const builtins for MPX. */
35009 static const struct builtin_description bdesc_mpx_const[] =
35011 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
35012 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
35013 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
35014 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
35015 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
35016 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
35017 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
35018 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
35021 /* FMA4 and XOP. */
35022 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
35023 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
35024 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
35025 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
35026 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
35027 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
35028 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
35029 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
35030 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
35031 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
35032 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
35033 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
35034 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
35035 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
35036 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
35037 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
35038 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
35039 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
35040 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
35041 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
35042 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
35043 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
35044 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
35045 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
35046 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
35047 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
35048 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
35049 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
35050 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
35051 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
35052 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
35053 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
35054 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
35055 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
35056 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
35057 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
35058 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
35059 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
35060 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
35061 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
35062 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
35063 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
35064 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
35065 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
35066 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
35067 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
35068 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
35069 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
35070 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
35071 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
35072 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
35073 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
35075 static const struct builtin_description bdesc_multi_arg[] =
35077 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
35078 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
35079 UNKNOWN, (int)MULTI_ARG_3_SF },
35080 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
35081 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
35082 UNKNOWN, (int)MULTI_ARG_3_DF },
35084 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
35085 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
35086 UNKNOWN, (int)MULTI_ARG_3_SF },
35087 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
35088 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
35089 UNKNOWN, (int)MULTI_ARG_3_DF },
35091 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
35092 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
35093 UNKNOWN, (int)MULTI_ARG_3_SF },
35094 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
35095 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
35096 UNKNOWN, (int)MULTI_ARG_3_DF },
35097 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
35098 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
35099 UNKNOWN, (int)MULTI_ARG_3_SF2 },
35100 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
35101 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
35102 UNKNOWN, (int)MULTI_ARG_3_DF2 },
35104 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
35105 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
35106 UNKNOWN, (int)MULTI_ARG_3_SF },
35107 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
35108 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
35109 UNKNOWN, (int)MULTI_ARG_3_DF },
35110 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
35111 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
35112 UNKNOWN, (int)MULTI_ARG_3_SF2 },
35113 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
35114 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
35115 UNKNOWN, (int)MULTI_ARG_3_DF2 },
35117 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
35118 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
35119 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
35120 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
35121 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
35122 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
35123 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
35125 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
35126 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
35127 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
35128 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
35129 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
35130 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
35131 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
35133 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
35135 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
35136 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
35137 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
35138 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
35139 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
35140 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
35141 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
35142 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
35143 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
35144 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
35145 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
35146 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
35148 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
35149 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
35150 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
35151 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
35152 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
35153 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
35154 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
35155 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
35156 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
35157 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
35158 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
35159 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
35160 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
35161 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
35162 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
35163 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
35165 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
35166 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
35167 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
35168 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
35169 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
35170 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
35172 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
35173 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
35174 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
35175 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
35176 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
35177 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
35178 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
35179 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
35180 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
35181 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
35182 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
35183 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
35184 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
35185 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
35186 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
35188 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
35189 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
35190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
35191 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
35192 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
35193 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
35194 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
35196 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
35197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
35198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
35199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
35200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
35201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
35202 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
35204 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
35205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
35206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
35207 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
35208 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
35209 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
35210 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
35212 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
35213 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
35214 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
35215 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
35216 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
35217 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
35218 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
35220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
35221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
35222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
35223 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
35224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
35225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
35226 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
35228 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
35229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
35230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
35231 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
35232 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
35233 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
35234 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
35236 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
35237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
35238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
35239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
35240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
35241 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
35242 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
35244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
35245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
35246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
35247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
35248 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
35249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
35250 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
35252 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
35253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
35254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
35255 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
35256 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
35257 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
35258 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
35259 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
35261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
35262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
35263 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
35264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
35265 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
35266 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
35267 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
35268 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
35270 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
35271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
35272 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
35273 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
35277 /* TM vector builtins. */
35279 /* Reuse the existing x86-specific `struct builtin_description' cause
35280 we're lazy. Add casts to make them fit. */
35281 static const struct builtin_description bdesc_tm[] =
35283 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
35284 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
35285 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
35286 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
35287 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
35288 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
35289 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
35291 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
35292 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
35293 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
35294 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
35295 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
35296 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
35297 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
35299 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
35300 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
35301 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
35302 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
35303 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
35304 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
35305 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
35307 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
35308 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
35309 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
35312 /* Initialize the transactional memory vector load/store builtins. */
35315 ix86_init_tm_builtins (void)
35317 enum ix86_builtin_func_type ftype;
35318 const struct builtin_description *d;
35321 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
35322 tree attrs_log, attrs_type_log;
35327 /* If there are no builtins defined, we must be compiling in a
35328 language without trans-mem support. */
35329 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
35332 /* Use whatever attributes a normal TM load has. */
35333 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
35334 attrs_load = DECL_ATTRIBUTES (decl);
35335 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
35336 /* Use whatever attributes a normal TM store has. */
35337 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
35338 attrs_store = DECL_ATTRIBUTES (decl);
35339 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
35340 /* Use whatever attributes a normal TM log has. */
35341 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
35342 attrs_log = DECL_ATTRIBUTES (decl);
35343 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
35345 for (i = 0, d = bdesc_tm;
35346 i < ARRAY_SIZE (bdesc_tm);
35349 if ((d->mask & ix86_isa_flags) != 0
35350 || (lang_hooks.builtin_function
35351 == lang_hooks.builtin_function_ext_scope))
35353 tree type, attrs, attrs_type;
35354 enum built_in_function code = (enum built_in_function) d->code;
35356 ftype = (enum ix86_builtin_func_type) d->flag;
35357 type = ix86_get_builtin_func_type (ftype);
35359 if (BUILTIN_TM_LOAD_P (code))
35361 attrs = attrs_load;
35362 attrs_type = attrs_type_load;
35364 else if (BUILTIN_TM_STORE_P (code))
35366 attrs = attrs_store;
35367 attrs_type = attrs_type_store;
35372 attrs_type = attrs_type_log;
35374 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
35375 /* The builtin without the prefix for
35376 calling it directly. */
35377 d->name + strlen ("__builtin_"),
35379 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
35380 set the TYPE_ATTRIBUTES. */
35381 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
35383 set_builtin_decl (code, decl, false);
35388 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
35389 in the current target ISA to allow the user to compile particular modules
35390 with different target specific options that differ from the command line
35393 ix86_init_mmx_sse_builtins (void)
35395 const struct builtin_description * d;
35396 enum ix86_builtin_func_type ftype;
35399 /* Add all special builtins with variable number of operands. */
35400 for (i = 0, d = bdesc_special_args;
35401 i < ARRAY_SIZE (bdesc_special_args);
35407 ftype = (enum ix86_builtin_func_type) d->flag;
35408 def_builtin (d->mask, d->name, ftype, d->code);
35411 /* Add all builtins with variable number of operands. */
35412 for (i = 0, d = bdesc_args;
35413 i < ARRAY_SIZE (bdesc_args);
35419 ftype = (enum ix86_builtin_func_type) d->flag;
35420 def_builtin_const (d->mask, d->name, ftype, d->code);
35423 /* Add all builtins with rounding. */
35424 for (i = 0, d = bdesc_round_args;
35425 i < ARRAY_SIZE (bdesc_round_args);
35431 ftype = (enum ix86_builtin_func_type) d->flag;
35432 def_builtin_const (d->mask, d->name, ftype, d->code);
35435 /* pcmpestr[im] insns. */
35436 for (i = 0, d = bdesc_pcmpestr;
35437 i < ARRAY_SIZE (bdesc_pcmpestr);
35440 if (d->code == IX86_BUILTIN_PCMPESTRM128)
35441 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
35443 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
35444 def_builtin_const (d->mask, d->name, ftype, d->code);
35447 /* pcmpistr[im] insns. */
35448 for (i = 0, d = bdesc_pcmpistr;
35449 i < ARRAY_SIZE (bdesc_pcmpistr);
35452 if (d->code == IX86_BUILTIN_PCMPISTRM128)
35453 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
35455 ftype = INT_FTYPE_V16QI_V16QI_INT;
35456 def_builtin_const (d->mask, d->name, ftype, d->code);
35459 /* comi/ucomi insns. */
35460 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
35462 if (d->mask == OPTION_MASK_ISA_SSE2)
35463 ftype = INT_FTYPE_V2DF_V2DF;
35465 ftype = INT_FTYPE_V4SF_V4SF;
35466 def_builtin_const (d->mask, d->name, ftype, d->code);
35470 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
35471 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
35472 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
35473 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
35475 /* SSE or 3DNow!A */
35476 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
35477 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
35478 IX86_BUILTIN_MASKMOVQ);
35481 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
35482 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
35484 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
35485 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
35486 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
35487 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
35490 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
35491 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
35492 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
35493 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
35496 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
35497 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
35498 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
35499 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
35500 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
35501 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
35502 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
35503 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
35504 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
35505 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
35506 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
35507 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
35510 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
35511 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
35514 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
35515 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
35516 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
35517 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
35518 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
35519 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
35520 IX86_BUILTIN_RDRAND64_STEP);
35523 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
35524 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
35525 IX86_BUILTIN_GATHERSIV2DF);
35527 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
35528 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
35529 IX86_BUILTIN_GATHERSIV4DF);
35531 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
35532 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
35533 IX86_BUILTIN_GATHERDIV2DF);
35535 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
35536 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
35537 IX86_BUILTIN_GATHERDIV4DF);
35539 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
35540 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
35541 IX86_BUILTIN_GATHERSIV4SF);
35543 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
35544 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
35545 IX86_BUILTIN_GATHERSIV8SF);
35547 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
35548 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
35549 IX86_BUILTIN_GATHERDIV4SF);
35551 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
35552 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
35553 IX86_BUILTIN_GATHERDIV8SF);
35555 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
35556 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
35557 IX86_BUILTIN_GATHERSIV2DI);
35559 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
35560 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
35561 IX86_BUILTIN_GATHERSIV4DI);
35563 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
35564 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
35565 IX86_BUILTIN_GATHERDIV2DI);
35567 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
35568 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
35569 IX86_BUILTIN_GATHERDIV4DI);
35571 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
35572 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
35573 IX86_BUILTIN_GATHERSIV4SI);
35575 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
35576 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
35577 IX86_BUILTIN_GATHERSIV8SI);
35579 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
35580 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
35581 IX86_BUILTIN_GATHERDIV4SI);
35583 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
35584 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
35585 IX86_BUILTIN_GATHERDIV8SI);
35587 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
35588 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
35589 IX86_BUILTIN_GATHERALTSIV4DF);
35591 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
35592 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
35593 IX86_BUILTIN_GATHERALTDIV8SF);
35595 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
35596 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
35597 IX86_BUILTIN_GATHERALTSIV4DI);
35599 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
35600 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
35601 IX86_BUILTIN_GATHERALTDIV8SI);
35604 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
35605 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
35606 IX86_BUILTIN_GATHER3SIV16SF);
35608 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
35609 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
35610 IX86_BUILTIN_GATHER3SIV8DF);
35612 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
35613 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
35614 IX86_BUILTIN_GATHER3DIV16SF);
35616 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
35617 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
35618 IX86_BUILTIN_GATHER3DIV8DF);
35620 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
35621 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
35622 IX86_BUILTIN_GATHER3SIV16SI);
35624 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
35625 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
35626 IX86_BUILTIN_GATHER3SIV8DI);
35628 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
35629 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
35630 IX86_BUILTIN_GATHER3DIV16SI);
35632 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
35633 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
35634 IX86_BUILTIN_GATHER3DIV8DI);
35636 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
35637 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
35638 IX86_BUILTIN_GATHER3ALTSIV8DF);
35640 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
35641 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
35642 IX86_BUILTIN_GATHER3ALTDIV16SF);
35644 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
35645 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
35646 IX86_BUILTIN_GATHER3ALTSIV8DI);
35648 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
35649 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
35650 IX86_BUILTIN_GATHER3ALTDIV16SI);
35652 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
35653 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
35654 IX86_BUILTIN_SCATTERSIV16SF);
35656 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
35657 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
35658 IX86_BUILTIN_SCATTERSIV8DF);
35660 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
35661 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
35662 IX86_BUILTIN_SCATTERDIV16SF);
35664 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
35665 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
35666 IX86_BUILTIN_SCATTERDIV8DF);
35668 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
35669 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
35670 IX86_BUILTIN_SCATTERSIV16SI);
35672 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
35673 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
35674 IX86_BUILTIN_SCATTERSIV8DI);
35676 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
35677 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
35678 IX86_BUILTIN_SCATTERDIV16SI);
35680 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
35681 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
35682 IX86_BUILTIN_SCATTERDIV8DI);
35685 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
35686 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
35687 IX86_BUILTIN_GATHER3SIV2DF);
35689 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
35690 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
35691 IX86_BUILTIN_GATHER3SIV4DF);
35693 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
35694 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
35695 IX86_BUILTIN_GATHER3DIV2DF);
35697 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
35698 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
35699 IX86_BUILTIN_GATHER3DIV4DF);
35701 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
35702 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
35703 IX86_BUILTIN_GATHER3SIV4SF);
35705 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
35706 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
35707 IX86_BUILTIN_GATHER3SIV8SF);
35709 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
35710 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
35711 IX86_BUILTIN_GATHER3DIV4SF);
35713 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
35714 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
35715 IX86_BUILTIN_GATHER3DIV8SF);
35717 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
35718 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
35719 IX86_BUILTIN_GATHER3SIV2DI);
35721 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
35722 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
35723 IX86_BUILTIN_GATHER3SIV4DI);
35725 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
35726 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
35727 IX86_BUILTIN_GATHER3DIV2DI);
35729 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
35730 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
35731 IX86_BUILTIN_GATHER3DIV4DI);
35733 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
35734 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
35735 IX86_BUILTIN_GATHER3SIV4SI);
35737 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
35738 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
35739 IX86_BUILTIN_GATHER3SIV8SI);
35741 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
35742 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
35743 IX86_BUILTIN_GATHER3DIV4SI);
35745 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
35746 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
35747 IX86_BUILTIN_GATHER3DIV8SI);
35749 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
35750 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
35751 IX86_BUILTIN_GATHER3ALTSIV4DF);
35753 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
35754 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
35755 IX86_BUILTIN_GATHER3ALTDIV8SF);
35757 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
35758 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
35759 IX86_BUILTIN_GATHER3ALTSIV4DI);
35761 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
35762 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
35763 IX86_BUILTIN_GATHER3ALTDIV8SI);
35765 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
35766 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
35767 IX86_BUILTIN_SCATTERSIV8SF);
35769 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
35770 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
35771 IX86_BUILTIN_SCATTERSIV4SF);
35773 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
35774 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
35775 IX86_BUILTIN_SCATTERSIV4DF);
35777 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
35778 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
35779 IX86_BUILTIN_SCATTERSIV2DF);
35781 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
35782 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
35783 IX86_BUILTIN_SCATTERDIV8SF);
35785 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
35786 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
35787 IX86_BUILTIN_SCATTERDIV4SF);
35789 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
35790 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
35791 IX86_BUILTIN_SCATTERDIV4DF);
35793 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
35794 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
35795 IX86_BUILTIN_SCATTERDIV2DF);
35797 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
35798 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
35799 IX86_BUILTIN_SCATTERSIV8SI);
35801 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
35802 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
35803 IX86_BUILTIN_SCATTERSIV4SI);
35805 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
35806 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
35807 IX86_BUILTIN_SCATTERSIV4DI);
35809 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
35810 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
35811 IX86_BUILTIN_SCATTERSIV2DI);
35813 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
35814 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
35815 IX86_BUILTIN_SCATTERDIV8SI);
35817 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
35818 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
35819 IX86_BUILTIN_SCATTERDIV4SI);
35821 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
35822 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
35823 IX86_BUILTIN_SCATTERDIV4DI);
35825 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
35826 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
35827 IX86_BUILTIN_SCATTERDIV2DI);
35828 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8df ",
35829 VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT,
35830 IX86_BUILTIN_SCATTERALTSIV8DF);
35832 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8sf ",
35833 VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT,
35834 IX86_BUILTIN_SCATTERALTDIV16SF);
35836 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8di ",
35837 VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT,
35838 IX86_BUILTIN_SCATTERALTSIV8DI);
35840 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8si ",
35841 VOID_FTYPE_PINT_HI_V8DI_V16SI_INT,
35842 IX86_BUILTIN_SCATTERALTDIV16SI);
35845 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
35846 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
35847 IX86_BUILTIN_GATHERPFDPD);
35848 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
35849 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
35850 IX86_BUILTIN_GATHERPFDPS);
35851 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
35852 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
35853 IX86_BUILTIN_GATHERPFQPD);
35854 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
35855 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
35856 IX86_BUILTIN_GATHERPFQPS);
35857 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
35858 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
35859 IX86_BUILTIN_SCATTERPFDPD);
35860 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
35861 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
35862 IX86_BUILTIN_SCATTERPFDPS);
35863 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
35864 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
35865 IX86_BUILTIN_SCATTERPFQPD);
35866 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
35867 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
35868 IX86_BUILTIN_SCATTERPFQPS);
35871 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
35872 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
35873 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
35874 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
35875 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
35876 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
35877 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
35878 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
35879 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
35880 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
35881 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
35882 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
35883 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
35884 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
35887 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
35888 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
35890 /* MMX access to the vec_init patterns. */
35891 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
35892 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
35894 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
35895 V4HI_FTYPE_HI_HI_HI_HI,
35896 IX86_BUILTIN_VEC_INIT_V4HI);
35898 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
35899 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
35900 IX86_BUILTIN_VEC_INIT_V8QI);
35902 /* Access to the vec_extract patterns. */
35903 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
35904 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
35905 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
35906 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
35907 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
35908 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
35909 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
35910 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
35911 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
35912 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
35914 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
35915 "__builtin_ia32_vec_ext_v4hi",
35916 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
35918 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
35919 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
35921 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
35922 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
35924 /* Access to the vec_set patterns. */
35925 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
35926 "__builtin_ia32_vec_set_v2di",
35927 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
35929 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
35930 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
35932 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
35933 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
35935 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
35936 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
35938 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
35939 "__builtin_ia32_vec_set_v4hi",
35940 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
35942 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
35943 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
35946 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
35947 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
35948 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
35949 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
35950 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
35951 "__builtin_ia32_rdseed_di_step",
35952 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
35955 def_builtin (0, "__builtin_ia32_addcarryx_u32",
35956 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
35957 def_builtin (OPTION_MASK_ISA_64BIT,
35958 "__builtin_ia32_addcarryx_u64",
35959 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
35960 IX86_BUILTIN_ADDCARRYX64);
35963 def_builtin (0, "__builtin_ia32_sbb_u32",
35964 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
35965 def_builtin (OPTION_MASK_ISA_64BIT,
35966 "__builtin_ia32_sbb_u64",
35967 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
35968 IX86_BUILTIN_SBB64);
35970 /* Read/write FLAGS. */
35971 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
35972 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
35973 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
35974 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
35975 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
35976 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
35977 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
35978 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
35981 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
35982 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
35985 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
35986 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
35988 /* MONITORX and MWAITX. */
35989 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
35990 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
35991 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
35992 VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
35995 def_builtin (OPTION_MASK_ISA_CLZERO, "__builtin_ia32_clzero",
35996 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLZERO);
35998 /* Add FMA4 multi-arg argument instructions */
35999 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36004 ftype = (enum ix86_builtin_func_type) d->flag;
36005 def_builtin_const (d->mask, d->name, ftype, d->code);
36010 ix86_init_mpx_builtins ()
36012 const struct builtin_description * d;
36013 enum ix86_builtin_func_type ftype;
36017 for (i = 0, d = bdesc_mpx;
36018 i < ARRAY_SIZE (bdesc_mpx);
36024 ftype = (enum ix86_builtin_func_type) d->flag;
36025 decl = def_builtin (d->mask, d->name, ftype, d->code);
36027 /* With no leaf and nothrow flags for MPX builtins
36028 abnormal edges may follow its call when setjmp
36029 presents in the function. Since we may have a lot
36030 of MPX builtins calls it causes lots of useless
36031 edges and enormous PHI nodes. To avoid this we mark
36032 MPX builtins as leaf and nothrow. */
36035 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
36037 TREE_NOTHROW (decl) = 1;
36041 ix86_builtins_isa[(int)d->code].leaf_p = true;
36042 ix86_builtins_isa[(int)d->code].nothrow_p = true;
36046 for (i = 0, d = bdesc_mpx_const;
36047 i < ARRAY_SIZE (bdesc_mpx_const);
36053 ftype = (enum ix86_builtin_func_type) d->flag;
36054 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
36058 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
36060 TREE_NOTHROW (decl) = 1;
36064 ix86_builtins_isa[(int)d->code].leaf_p = true;
36065 ix86_builtins_isa[(int)d->code].nothrow_p = true;
36070 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
36071 to return a pointer to VERSION_DECL if the outcome of the expression
36072 formed by PREDICATE_CHAIN is true. This function will be called during
36073 version dispatch to decide which function version to execute. It returns
36074 the basic block at the end, to which more conditions can be added. */
36077 add_condition_to_bb (tree function_decl, tree version_decl,
36078 tree predicate_chain, basic_block new_bb)
36080 gimple *return_stmt;
36081 tree convert_expr, result_var;
36082 gimple *convert_stmt;
36083 gimple *call_cond_stmt;
36084 gimple *if_else_stmt;
36086 basic_block bb1, bb2, bb3;
36089 tree cond_var, and_expr_var = NULL_TREE;
36092 tree predicate_decl, predicate_arg;
36094 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
36096 gcc_assert (new_bb != NULL);
36097 gseq = bb_seq (new_bb);
36100 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
36101 build_fold_addr_expr (version_decl));
36102 result_var = create_tmp_var (ptr_type_node);
36103 convert_stmt = gimple_build_assign (result_var, convert_expr);
36104 return_stmt = gimple_build_return (result_var);
36106 if (predicate_chain == NULL_TREE)
36108 gimple_seq_add_stmt (&gseq, convert_stmt);
36109 gimple_seq_add_stmt (&gseq, return_stmt);
36110 set_bb_seq (new_bb, gseq);
36111 gimple_set_bb (convert_stmt, new_bb);
36112 gimple_set_bb (return_stmt, new_bb);
36117 while (predicate_chain != NULL)
36119 cond_var = create_tmp_var (integer_type_node);
36120 predicate_decl = TREE_PURPOSE (predicate_chain);
36121 predicate_arg = TREE_VALUE (predicate_chain);
36122 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
36123 gimple_call_set_lhs (call_cond_stmt, cond_var);
36125 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
36126 gimple_set_bb (call_cond_stmt, new_bb);
36127 gimple_seq_add_stmt (&gseq, call_cond_stmt);
36129 predicate_chain = TREE_CHAIN (predicate_chain);
36131 if (and_expr_var == NULL)
36132 and_expr_var = cond_var;
36135 gimple *assign_stmt;
36136 /* Use MIN_EXPR to check if any integer is zero?.
36137 and_expr_var = min_expr <cond_var, and_expr_var> */
36138 assign_stmt = gimple_build_assign (and_expr_var,
36139 build2 (MIN_EXPR, integer_type_node,
36140 cond_var, and_expr_var));
36142 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
36143 gimple_set_bb (assign_stmt, new_bb);
36144 gimple_seq_add_stmt (&gseq, assign_stmt);
36148 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
36150 NULL_TREE, NULL_TREE);
36151 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
36152 gimple_set_bb (if_else_stmt, new_bb);
36153 gimple_seq_add_stmt (&gseq, if_else_stmt);
36155 gimple_seq_add_stmt (&gseq, convert_stmt);
36156 gimple_seq_add_stmt (&gseq, return_stmt);
36157 set_bb_seq (new_bb, gseq);
36160 e12 = split_block (bb1, if_else_stmt);
36162 e12->flags &= ~EDGE_FALLTHRU;
36163 e12->flags |= EDGE_TRUE_VALUE;
36165 e23 = split_block (bb2, return_stmt);
36167 gimple_set_bb (convert_stmt, bb2);
36168 gimple_set_bb (return_stmt, bb2);
36171 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
36174 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
36181 /* This parses the attribute arguments to target in DECL and determines
36182 the right builtin to use to match the platform specification.
36183 It returns the priority value for this version decl. If PREDICATE_LIST
36184 is not NULL, it stores the list of cpu features that need to be checked
36185 before dispatching this function. */
36187 static unsigned int
36188 get_builtin_code_for_version (tree decl, tree *predicate_list)
36191 struct cl_target_option cur_target;
36193 struct cl_target_option *new_target;
36194 const char *arg_str = NULL;
36195 const char *attrs_str = NULL;
36196 char *tok_str = NULL;
36199 /* Priority of i386 features, greater value is higher priority. This is
36200 used to decide the order in which function dispatch must happen. For
36201 instance, a version specialized for SSE4.2 should be checked for dispatch
36202 before a version for SSE3, as SSE4.2 implies SSE3. */
36203 enum feature_priority
36236 enum feature_priority priority = P_ZERO;
36238 /* These are the target attribute strings for which a dispatcher is
36239 available, from fold_builtin_cpu. */
36241 static struct _feature_list
36243 const char *const name;
36244 const enum feature_priority priority;
36246 const feature_list[] =
36252 {"sse4a", P_SSE4_A},
36253 {"ssse3", P_SSSE3},
36254 {"sse4.1", P_SSE4_1},
36255 {"sse4.2", P_SSE4_2},
36256 {"popcnt", P_POPCNT},
36258 {"pclmul", P_PCLMUL},
36266 {"avx512f", P_AVX512F}
36270 static unsigned int NUM_FEATURES
36271 = sizeof (feature_list) / sizeof (struct _feature_list);
36275 tree predicate_chain = NULL_TREE;
36276 tree predicate_decl, predicate_arg;
36278 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
36279 gcc_assert (attrs != NULL);
36281 attrs = TREE_VALUE (TREE_VALUE (attrs));
36283 gcc_assert (TREE_CODE (attrs) == STRING_CST);
36284 attrs_str = TREE_STRING_POINTER (attrs);
36286 /* Return priority zero for default function. */
36287 if (strcmp (attrs_str, "default") == 0)
36290 /* Handle arch= if specified. For priority, set it to be 1 more than
36291 the best instruction set the processor can handle. For instance, if
36292 there is a version for atom and a version for ssse3 (the highest ISA
36293 priority for atom), the atom version must be checked for dispatch
36294 before the ssse3 version. */
36295 if (strstr (attrs_str, "arch=") != NULL)
36297 cl_target_option_save (&cur_target, &global_options);
36298 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
36299 &global_options_set);
36301 gcc_assert (target_node);
36302 new_target = TREE_TARGET_OPTION (target_node);
36303 gcc_assert (new_target);
36305 if (new_target->arch_specified && new_target->arch > 0)
36307 switch (new_target->arch)
36309 case PROCESSOR_CORE2:
36311 priority = P_PROC_SSSE3;
36313 case PROCESSOR_NEHALEM:
36314 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
36315 arg_str = "westmere";
36317 /* We translate "arch=corei7" and "arch=nehalem" to
36318 "corei7" so that it will be mapped to M_INTEL_COREI7
36319 as cpu type to cover all M_INTEL_COREI7_XXXs. */
36320 arg_str = "corei7";
36321 priority = P_PROC_SSE4_2;
36323 case PROCESSOR_SANDYBRIDGE:
36324 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
36325 arg_str = "ivybridge";
36327 arg_str = "sandybridge";
36328 priority = P_PROC_AVX;
36330 case PROCESSOR_HASWELL:
36331 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
36332 arg_str = "skylake-avx512";
36333 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_XSAVES)
36334 arg_str = "skylake";
36335 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
36336 arg_str = "broadwell";
36338 arg_str = "haswell";
36339 priority = P_PROC_AVX2;
36341 case PROCESSOR_BONNELL:
36342 arg_str = "bonnell";
36343 priority = P_PROC_SSSE3;
36345 case PROCESSOR_KNL:
36347 priority = P_PROC_AVX512F;
36349 case PROCESSOR_SILVERMONT:
36350 arg_str = "silvermont";
36351 priority = P_PROC_SSE4_2;
36353 case PROCESSOR_AMDFAM10:
36354 arg_str = "amdfam10h";
36355 priority = P_PROC_SSE4_A;
36357 case PROCESSOR_BTVER1:
36358 arg_str = "btver1";
36359 priority = P_PROC_SSE4_A;
36361 case PROCESSOR_BTVER2:
36362 arg_str = "btver2";
36363 priority = P_PROC_BMI;
36365 case PROCESSOR_BDVER1:
36366 arg_str = "bdver1";
36367 priority = P_PROC_XOP;
36369 case PROCESSOR_BDVER2:
36370 arg_str = "bdver2";
36371 priority = P_PROC_FMA;
36373 case PROCESSOR_BDVER3:
36374 arg_str = "bdver3";
36375 priority = P_PROC_FMA;
36377 case PROCESSOR_BDVER4:
36378 arg_str = "bdver4";
36379 priority = P_PROC_AVX2;
36381 case PROCESSOR_ZNVER1:
36382 arg_str = "znver1";
36383 priority = P_PROC_AVX2;
36388 cl_target_option_restore (&global_options, &cur_target);
36390 if (predicate_list && arg_str == NULL)
36392 error_at (DECL_SOURCE_LOCATION (decl),
36393 "No dispatcher found for the versioning attributes");
36397 if (predicate_list)
36399 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
36400 /* For a C string literal the length includes the trailing NULL. */
36401 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
36402 predicate_chain = tree_cons (predicate_decl, predicate_arg,
36407 /* Process feature name. */
36408 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
36409 strcpy (tok_str, attrs_str);
36410 token = strtok (tok_str, ",");
36411 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
36413 while (token != NULL)
36415 /* Do not process "arch=" */
36416 if (strncmp (token, "arch=", 5) == 0)
36418 token = strtok (NULL, ",");
36421 for (i = 0; i < NUM_FEATURES; ++i)
36423 if (strcmp (token, feature_list[i].name) == 0)
36425 if (predicate_list)
36427 predicate_arg = build_string_literal (
36428 strlen (feature_list[i].name) + 1,
36429 feature_list[i].name);
36430 predicate_chain = tree_cons (predicate_decl, predicate_arg,
36433 /* Find the maximum priority feature. */
36434 if (feature_list[i].priority > priority)
36435 priority = feature_list[i].priority;
36440 if (predicate_list && i == NUM_FEATURES)
36442 error_at (DECL_SOURCE_LOCATION (decl),
36443 "No dispatcher found for %s", token);
36446 token = strtok (NULL, ",");
36450 if (predicate_list && predicate_chain == NULL_TREE)
36452 error_at (DECL_SOURCE_LOCATION (decl),
36453 "No dispatcher found for the versioning attributes : %s",
36457 else if (predicate_list)
36459 predicate_chain = nreverse (predicate_chain);
36460 *predicate_list = predicate_chain;
36466 /* This compares the priority of target features in function DECL1
36467 and DECL2. It returns positive value if DECL1 is higher priority,
36468 negative value if DECL2 is higher priority and 0 if they are the
36472 ix86_compare_version_priority (tree decl1, tree decl2)
36474 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
36475 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
36477 return (int)priority1 - (int)priority2;
36480 /* V1 and V2 point to function versions with different priorities
36481 based on the target ISA. This function compares their priorities. */
36484 feature_compare (const void *v1, const void *v2)
36486 typedef struct _function_version_info
36489 tree predicate_chain;
36490 unsigned int dispatch_priority;
36491 } function_version_info;
36493 const function_version_info c1 = *(const function_version_info *)v1;
36494 const function_version_info c2 = *(const function_version_info *)v2;
36495 return (c2.dispatch_priority - c1.dispatch_priority);
36498 /* This function generates the dispatch function for
36499 multi-versioned functions. DISPATCH_DECL is the function which will
36500 contain the dispatch logic. FNDECLS are the function choices for
36501 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
36502 in DISPATCH_DECL in which the dispatch code is generated. */
36505 dispatch_function_versions (tree dispatch_decl,
36507 basic_block *empty_bb)
36510 gimple *ifunc_cpu_init_stmt;
36514 vec<tree> *fndecls;
36515 unsigned int num_versions = 0;
36516 unsigned int actual_versions = 0;
36519 struct _function_version_info
36522 tree predicate_chain;
36523 unsigned int dispatch_priority;
36524 }*function_version_info;
36526 gcc_assert (dispatch_decl != NULL
36527 && fndecls_p != NULL
36528 && empty_bb != NULL);
36530 /*fndecls_p is actually a vector. */
36531 fndecls = static_cast<vec<tree> *> (fndecls_p);
36533 /* At least one more version other than the default. */
36534 num_versions = fndecls->length ();
36535 gcc_assert (num_versions >= 2);
36537 function_version_info = (struct _function_version_info *)
36538 XNEWVEC (struct _function_version_info, (num_versions - 1));
36540 /* The first version in the vector is the default decl. */
36541 default_decl = (*fndecls)[0];
36543 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
36545 gseq = bb_seq (*empty_bb);
36546 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
36547 constructors, so explicity call __builtin_cpu_init here. */
36548 ifunc_cpu_init_stmt = gimple_build_call_vec (
36549 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
36550 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
36551 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
36552 set_bb_seq (*empty_bb, gseq);
36557 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
36559 tree version_decl = ele;
36560 tree predicate_chain = NULL_TREE;
36561 unsigned int priority;
36562 /* Get attribute string, parse it and find the right predicate decl.
36563 The predicate function could be a lengthy combination of many
36564 features, like arch-type and various isa-variants. */
36565 priority = get_builtin_code_for_version (version_decl,
36568 if (predicate_chain == NULL_TREE)
36571 function_version_info [actual_versions].version_decl = version_decl;
36572 function_version_info [actual_versions].predicate_chain
36574 function_version_info [actual_versions].dispatch_priority = priority;
36578 /* Sort the versions according to descending order of dispatch priority. The
36579 priority is based on the ISA. This is not a perfect solution. There
36580 could still be ambiguity. If more than one function version is suitable
36581 to execute, which one should be dispatched? In future, allow the user
36582 to specify a dispatch priority next to the version. */
36583 qsort (function_version_info, actual_versions,
36584 sizeof (struct _function_version_info), feature_compare);
36586 for (i = 0; i < actual_versions; ++i)
36587 *empty_bb = add_condition_to_bb (dispatch_decl,
36588 function_version_info[i].version_decl,
36589 function_version_info[i].predicate_chain,
36592 /* dispatch default version at the end. */
36593 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
36596 free (function_version_info);
36600 /* Comparator function to be used in qsort routine to sort attribute
36601 specification strings to "target". */
36604 attr_strcmp (const void *v1, const void *v2)
36606 const char *c1 = *(char *const*)v1;
36607 const char *c2 = *(char *const*)v2;
36608 return strcmp (c1, c2);
36611 /* ARGLIST is the argument to target attribute. This function tokenizes
36612 the comma separated arguments, sorts them and returns a string which
36613 is a unique identifier for the comma separated arguments. It also
36614 replaces non-identifier characters "=,-" with "_". */
36617 sorted_attr_string (tree arglist)
36620 size_t str_len_sum = 0;
36621 char **args = NULL;
36622 char *attr_str, *ret_str;
36624 unsigned int argnum = 1;
36627 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
36629 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
36630 size_t len = strlen (str);
36631 str_len_sum += len + 1;
36632 if (arg != arglist)
36634 for (i = 0; i < strlen (str); i++)
36639 attr_str = XNEWVEC (char, str_len_sum);
36641 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
36643 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
36644 size_t len = strlen (str);
36645 memcpy (attr_str + str_len_sum, str, len);
36646 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
36647 str_len_sum += len + 1;
36650 /* Replace "=,-" with "_". */
36651 for (i = 0; i < strlen (attr_str); i++)
36652 if (attr_str[i] == '=' || attr_str[i]== '-')
36658 args = XNEWVEC (char *, argnum);
36661 attr = strtok (attr_str, ",");
36662 while (attr != NULL)
36666 attr = strtok (NULL, ",");
36669 qsort (args, argnum, sizeof (char *), attr_strcmp);
36671 ret_str = XNEWVEC (char, str_len_sum);
36673 for (i = 0; i < argnum; i++)
36675 size_t len = strlen (args[i]);
36676 memcpy (ret_str + str_len_sum, args[i], len);
36677 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
36678 str_len_sum += len + 1;
36682 XDELETEVEC (attr_str);
36686 /* This function changes the assembler name for functions that are
36687 versions. If DECL is a function version and has a "target"
36688 attribute, it appends the attribute string to its assembler name. */
36691 ix86_mangle_function_version_assembler_name (tree decl, tree id)
36694 const char *orig_name, *version_string;
36695 char *attr_str, *assembler_name;
36697 if (DECL_DECLARED_INLINE_P (decl)
36698 && lookup_attribute ("gnu_inline",
36699 DECL_ATTRIBUTES (decl)))
36700 error_at (DECL_SOURCE_LOCATION (decl),
36701 "Function versions cannot be marked as gnu_inline,"
36702 " bodies have to be generated");
36704 if (DECL_VIRTUAL_P (decl)
36705 || DECL_VINDEX (decl))
36706 sorry ("Virtual function multiversioning not supported");
36708 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
36710 /* target attribute string cannot be NULL. */
36711 gcc_assert (version_attr != NULL_TREE);
36713 orig_name = IDENTIFIER_POINTER (id);
36715 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
36717 if (strcmp (version_string, "default") == 0)
36720 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
36721 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
36723 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
36725 /* Allow assembler name to be modified if already set. */
36726 if (DECL_ASSEMBLER_NAME_SET_P (decl))
36727 SET_DECL_RTL (decl, NULL);
36729 tree ret = get_identifier (assembler_name);
36730 XDELETEVEC (attr_str);
36731 XDELETEVEC (assembler_name);
36735 /* This function returns true if FN1 and FN2 are versions of the same function,
36736 that is, the target strings of the function decls are different. This assumes
36737 that FN1 and FN2 have the same signature. */
36740 ix86_function_versions (tree fn1, tree fn2)
36743 char *target1, *target2;
36746 if (TREE_CODE (fn1) != FUNCTION_DECL
36747 || TREE_CODE (fn2) != FUNCTION_DECL)
36750 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
36751 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
36753 /* At least one function decl should have the target attribute specified. */
36754 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
36757 /* Diagnose missing target attribute if one of the decls is already
36758 multi-versioned. */
36759 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
36761 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
36763 if (attr2 != NULL_TREE)
36765 std::swap (fn1, fn2);
36768 error_at (DECL_SOURCE_LOCATION (fn2),
36769 "missing %<target%> attribute for multi-versioned %D",
36771 inform (DECL_SOURCE_LOCATION (fn1),
36772 "previous declaration of %D", fn1);
36773 /* Prevent diagnosing of the same error multiple times. */
36774 DECL_ATTRIBUTES (fn2)
36775 = tree_cons (get_identifier ("target"),
36776 copy_node (TREE_VALUE (attr1)),
36777 DECL_ATTRIBUTES (fn2));
36782 target1 = sorted_attr_string (TREE_VALUE (attr1));
36783 target2 = sorted_attr_string (TREE_VALUE (attr2));
36785 /* The sorted target strings must be different for fn1 and fn2
36787 if (strcmp (target1, target2) == 0)
36792 XDELETEVEC (target1);
36793 XDELETEVEC (target2);
36799 ix86_mangle_decl_assembler_name (tree decl, tree id)
36801 /* For function version, add the target suffix to the assembler name. */
36802 if (TREE_CODE (decl) == FUNCTION_DECL
36803 && DECL_FUNCTION_VERSIONED (decl))
36804 id = ix86_mangle_function_version_assembler_name (decl, id);
36805 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
36806 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
36812 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
36813 is true, append the full path name of the source file. */
36816 make_name (tree decl, const char *suffix, bool make_unique)
36818 char *global_var_name;
36821 const char *unique_name = NULL;
36823 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
36825 /* Get a unique name that can be used globally without any chances
36826 of collision at link time. */
36828 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
36830 name_len = strlen (name) + strlen (suffix) + 2;
36833 name_len += strlen (unique_name) + 1;
36834 global_var_name = XNEWVEC (char, name_len);
36836 /* Use '.' to concatenate names as it is demangler friendly. */
36838 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
36841 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
36843 return global_var_name;
36846 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
36848 /* Make a dispatcher declaration for the multi-versioned function DECL.
36849 Calls to DECL function will be replaced with calls to the dispatcher
36850 by the front-end. Return the decl created. */
36853 make_dispatcher_decl (const tree decl)
36857 tree fn_type, func_type;
36858 bool is_uniq = false;
36860 if (TREE_PUBLIC (decl) == 0)
36863 func_name = make_name (decl, "ifunc", is_uniq);
36865 fn_type = TREE_TYPE (decl);
36866 func_type = build_function_type (TREE_TYPE (fn_type),
36867 TYPE_ARG_TYPES (fn_type));
36869 func_decl = build_fn_decl (func_name, func_type);
36870 XDELETEVEC (func_name);
36871 TREE_USED (func_decl) = 1;
36872 DECL_CONTEXT (func_decl) = NULL_TREE;
36873 DECL_INITIAL (func_decl) = error_mark_node;
36874 DECL_ARTIFICIAL (func_decl) = 1;
36875 /* Mark this func as external, the resolver will flip it again if
36876 it gets generated. */
36877 DECL_EXTERNAL (func_decl) = 1;
36878 /* This will be of type IFUNCs have to be externally visible. */
36879 TREE_PUBLIC (func_decl) = 1;
36886 /* Returns true if decl is multi-versioned and DECL is the default function,
36887 that is it is not tagged with target specific optimization. */
36890 is_function_default_version (const tree decl)
36892 if (TREE_CODE (decl) != FUNCTION_DECL
36893 || !DECL_FUNCTION_VERSIONED (decl))
36895 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
36897 attr = TREE_VALUE (TREE_VALUE (attr));
36898 return (TREE_CODE (attr) == STRING_CST
36899 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
36902 /* Make a dispatcher declaration for the multi-versioned function DECL.
36903 Calls to DECL function will be replaced with calls to the dispatcher
36904 by the front-end. Returns the decl of the dispatcher function. */
36907 ix86_get_function_versions_dispatcher (void *decl)
36909 tree fn = (tree) decl;
36910 struct cgraph_node *node = NULL;
36911 struct cgraph_node *default_node = NULL;
36912 struct cgraph_function_version_info *node_v = NULL;
36913 struct cgraph_function_version_info *first_v = NULL;
36915 tree dispatch_decl = NULL;
36917 struct cgraph_function_version_info *default_version_info = NULL;
36919 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
36921 node = cgraph_node::get (fn);
36922 gcc_assert (node != NULL);
36924 node_v = node->function_version ();
36925 gcc_assert (node_v != NULL);
36927 if (node_v->dispatcher_resolver != NULL)
36928 return node_v->dispatcher_resolver;
36930 /* Find the default version and make it the first node. */
36932 /* Go to the beginning of the chain. */
36933 while (first_v->prev != NULL)
36934 first_v = first_v->prev;
36935 default_version_info = first_v;
36936 while (default_version_info != NULL)
36938 if (is_function_default_version
36939 (default_version_info->this_node->decl))
36941 default_version_info = default_version_info->next;
36944 /* If there is no default node, just return NULL. */
36945 if (default_version_info == NULL)
36948 /* Make default info the first node. */
36949 if (first_v != default_version_info)
36951 default_version_info->prev->next = default_version_info->next;
36952 if (default_version_info->next)
36953 default_version_info->next->prev = default_version_info->prev;
36954 first_v->prev = default_version_info;
36955 default_version_info->next = first_v;
36956 default_version_info->prev = NULL;
36959 default_node = default_version_info->this_node;
36961 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
36962 if (targetm.has_ifunc_p ())
36964 struct cgraph_function_version_info *it_v = NULL;
36965 struct cgraph_node *dispatcher_node = NULL;
36966 struct cgraph_function_version_info *dispatcher_version_info = NULL;
36968 /* Right now, the dispatching is done via ifunc. */
36969 dispatch_decl = make_dispatcher_decl (default_node->decl);
36971 dispatcher_node = cgraph_node::get_create (dispatch_decl);
36972 gcc_assert (dispatcher_node != NULL);
36973 dispatcher_node->dispatcher_function = 1;
36974 dispatcher_version_info
36975 = dispatcher_node->insert_new_function_version ();
36976 dispatcher_version_info->next = default_version_info;
36977 dispatcher_node->definition = 1;
36979 /* Set the dispatcher for all the versions. */
36980 it_v = default_version_info;
36981 while (it_v != NULL)
36983 it_v->dispatcher_resolver = dispatch_decl;
36990 error_at (DECL_SOURCE_LOCATION (default_node->decl),
36991 "multiversioning needs ifunc which is not supported "
36995 return dispatch_decl;
36998 /* Make the resolver function decl to dispatch the versions of
36999 a multi-versioned function, DEFAULT_DECL. Create an
37000 empty basic block in the resolver and store the pointer in
37001 EMPTY_BB. Return the decl of the resolver function. */
37004 make_resolver_func (const tree default_decl,
37005 const tree dispatch_decl,
37006 basic_block *empty_bb)
37008 char *resolver_name;
37009 tree decl, type, decl_name, t;
37010 bool is_uniq = false;
37012 /* IFUNC's have to be globally visible. So, if the default_decl is
37013 not, then the name of the IFUNC should be made unique. */
37014 if (TREE_PUBLIC (default_decl) == 0)
37017 /* Append the filename to the resolver function if the versions are
37018 not externally visible. This is because the resolver function has
37019 to be externally visible for the loader to find it. So, appending
37020 the filename will prevent conflicts with a resolver function from
37021 another module which is based on the same version name. */
37022 resolver_name = make_name (default_decl, "resolver", is_uniq);
37024 /* The resolver function should return a (void *). */
37025 type = build_function_type_list (ptr_type_node, NULL_TREE);
37027 decl = build_fn_decl (resolver_name, type);
37028 decl_name = get_identifier (resolver_name);
37029 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
37031 DECL_NAME (decl) = decl_name;
37032 TREE_USED (decl) = 1;
37033 DECL_ARTIFICIAL (decl) = 1;
37034 DECL_IGNORED_P (decl) = 0;
37035 /* IFUNC resolvers have to be externally visible. */
37036 TREE_PUBLIC (decl) = 1;
37037 DECL_UNINLINABLE (decl) = 1;
37039 /* Resolver is not external, body is generated. */
37040 DECL_EXTERNAL (decl) = 0;
37041 DECL_EXTERNAL (dispatch_decl) = 0;
37043 DECL_CONTEXT (decl) = NULL_TREE;
37044 DECL_INITIAL (decl) = make_node (BLOCK);
37045 DECL_STATIC_CONSTRUCTOR (decl) = 0;
37047 if (DECL_COMDAT_GROUP (default_decl)
37048 || TREE_PUBLIC (default_decl))
37050 /* In this case, each translation unit with a call to this
37051 versioned function will put out a resolver. Ensure it
37052 is comdat to keep just one copy. */
37053 DECL_COMDAT (decl) = 1;
37054 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
37056 /* Build result decl and add to function_decl. */
37057 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
37058 DECL_ARTIFICIAL (t) = 1;
37059 DECL_IGNORED_P (t) = 1;
37060 DECL_RESULT (decl) = t;
37062 gimplify_function_tree (decl);
37063 push_cfun (DECL_STRUCT_FUNCTION (decl));
37064 *empty_bb = init_lowered_empty_function (decl, false, 0);
37066 cgraph_node::add_new_function (decl, true);
37067 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
37071 gcc_assert (dispatch_decl != NULL);
37072 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
37073 DECL_ATTRIBUTES (dispatch_decl)
37074 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
37076 /* Create the alias for dispatch to resolver here. */
37077 /*cgraph_create_function_alias (dispatch_decl, decl);*/
37078 cgraph_node::create_same_body_alias (dispatch_decl, decl);
37079 XDELETEVEC (resolver_name);
37083 /* Generate the dispatching code body to dispatch multi-versioned function
37084 DECL. The target hook is called to process the "target" attributes and
37085 provide the code to dispatch the right function at run-time. NODE points
37086 to the dispatcher decl whose body will be created. */
37089 ix86_generate_version_dispatcher_body (void *node_p)
37091 tree resolver_decl;
37092 basic_block empty_bb;
37093 tree default_ver_decl;
37094 struct cgraph_node *versn;
37095 struct cgraph_node *node;
37097 struct cgraph_function_version_info *node_version_info = NULL;
37098 struct cgraph_function_version_info *versn_info = NULL;
37100 node = (cgraph_node *)node_p;
37102 node_version_info = node->function_version ();
37103 gcc_assert (node->dispatcher_function
37104 && node_version_info != NULL);
37106 if (node_version_info->dispatcher_resolver)
37107 return node_version_info->dispatcher_resolver;
37109 /* The first version in the chain corresponds to the default version. */
37110 default_ver_decl = node_version_info->next->this_node->decl;
37112 /* node is going to be an alias, so remove the finalized bit. */
37113 node->definition = false;
37115 resolver_decl = make_resolver_func (default_ver_decl,
37116 node->decl, &empty_bb);
37118 node_version_info->dispatcher_resolver = resolver_decl;
37120 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
37122 auto_vec<tree, 2> fn_ver_vec;
37124 for (versn_info = node_version_info->next; versn_info;
37125 versn_info = versn_info->next)
37127 versn = versn_info->this_node;
37128 /* Check for virtual functions here again, as by this time it should
37129 have been determined if this function needs a vtable index or
37130 not. This happens for methods in derived classes that override
37131 virtual methods in base classes but are not explicitly marked as
37133 if (DECL_VINDEX (versn->decl))
37134 sorry ("Virtual function multiversioning not supported");
37136 fn_ver_vec.safe_push (versn->decl);
37139 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
37140 cgraph_edge::rebuild_edges ();
37142 return resolver_decl;
37144 /* This builds the processor_model struct type defined in
37145 libgcc/config/i386/cpuinfo.c */
37148 build_processor_model_struct (void)
37150 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
37152 tree field = NULL_TREE, field_chain = NULL_TREE;
37154 tree type = make_node (RECORD_TYPE);
37156 /* The first 3 fields are unsigned int. */
37157 for (i = 0; i < 3; ++i)
37159 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
37160 get_identifier (field_name[i]), unsigned_type_node);
37161 if (field_chain != NULL_TREE)
37162 DECL_CHAIN (field) = field_chain;
37163 field_chain = field;
37166 /* The last field is an array of unsigned integers of size one. */
37167 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
37168 get_identifier (field_name[3]),
37169 build_array_type (unsigned_type_node,
37170 build_index_type (size_one_node)));
37171 if (field_chain != NULL_TREE)
37172 DECL_CHAIN (field) = field_chain;
37173 field_chain = field;
37175 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
37179 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
37182 make_var_decl (tree type, const char *name)
37186 new_decl = build_decl (UNKNOWN_LOCATION,
37188 get_identifier(name),
37191 DECL_EXTERNAL (new_decl) = 1;
37192 TREE_STATIC (new_decl) = 1;
37193 TREE_PUBLIC (new_decl) = 1;
37194 DECL_INITIAL (new_decl) = 0;
37195 DECL_ARTIFICIAL (new_decl) = 0;
37196 DECL_PRESERVE_P (new_decl) = 1;
37198 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
37199 assemble_variable (new_decl, 0, 0, 0);
37204 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
37205 into an integer defined in libgcc/config/i386/cpuinfo.c */
37208 fold_builtin_cpu (tree fndecl, tree *args)
37211 enum ix86_builtins fn_code = (enum ix86_builtins)
37212 DECL_FUNCTION_CODE (fndecl);
37213 tree param_string_cst = NULL;
37215 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
37216 enum processor_features
37249 /* These are the values for vendor types and cpu types and subtypes
37250 in cpuinfo.c. Cpu types and subtypes should be subtracted by
37251 the corresponding start value. */
37252 enum processor_model
37262 M_INTEL_SILVERMONT,
37266 M_CPU_SUBTYPE_START,
37267 M_INTEL_COREI7_NEHALEM,
37268 M_INTEL_COREI7_WESTMERE,
37269 M_INTEL_COREI7_SANDYBRIDGE,
37270 M_AMDFAM10H_BARCELONA,
37271 M_AMDFAM10H_SHANGHAI,
37272 M_AMDFAM10H_ISTANBUL,
37273 M_AMDFAM15H_BDVER1,
37274 M_AMDFAM15H_BDVER2,
37275 M_AMDFAM15H_BDVER3,
37276 M_AMDFAM15H_BDVER4,
37277 M_AMDFAM17H_ZNVER1,
37278 M_INTEL_COREI7_IVYBRIDGE,
37279 M_INTEL_COREI7_HASWELL,
37280 M_INTEL_COREI7_BROADWELL,
37281 M_INTEL_COREI7_SKYLAKE,
37282 M_INTEL_COREI7_SKYLAKE_AVX512
37285 static struct _arch_names_table
37287 const char *const name;
37288 const enum processor_model model;
37290 const arch_names_table[] =
37293 {"intel", M_INTEL},
37294 {"atom", M_INTEL_BONNELL},
37295 {"slm", M_INTEL_SILVERMONT},
37296 {"core2", M_INTEL_CORE2},
37297 {"corei7", M_INTEL_COREI7},
37298 {"nehalem", M_INTEL_COREI7_NEHALEM},
37299 {"westmere", M_INTEL_COREI7_WESTMERE},
37300 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
37301 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
37302 {"haswell", M_INTEL_COREI7_HASWELL},
37303 {"broadwell", M_INTEL_COREI7_BROADWELL},
37304 {"skylake", M_INTEL_COREI7_SKYLAKE},
37305 {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512},
37306 {"bonnell", M_INTEL_BONNELL},
37307 {"silvermont", M_INTEL_SILVERMONT},
37308 {"knl", M_INTEL_KNL},
37309 {"amdfam10h", M_AMDFAM10H},
37310 {"barcelona", M_AMDFAM10H_BARCELONA},
37311 {"shanghai", M_AMDFAM10H_SHANGHAI},
37312 {"istanbul", M_AMDFAM10H_ISTANBUL},
37313 {"btver1", M_AMD_BTVER1},
37314 {"amdfam15h", M_AMDFAM15H},
37315 {"bdver1", M_AMDFAM15H_BDVER1},
37316 {"bdver2", M_AMDFAM15H_BDVER2},
37317 {"bdver3", M_AMDFAM15H_BDVER3},
37318 {"bdver4", M_AMDFAM15H_BDVER4},
37319 {"btver2", M_AMD_BTVER2},
37320 {"znver1", M_AMDFAM17H_ZNVER1},
37323 static struct _isa_names_table
37325 const char *const name;
37326 const enum processor_features feature;
37328 const isa_names_table[] =
37332 {"popcnt", F_POPCNT},
37336 {"ssse3", F_SSSE3},
37337 {"sse4a", F_SSE4_A},
37338 {"sse4.1", F_SSE4_1},
37339 {"sse4.2", F_SSE4_2},
37345 {"avx512f", F_AVX512F},
37349 {"pclmul", F_PCLMUL},
37350 {"avx512vl",F_AVX512VL},
37351 {"avx512bw",F_AVX512BW},
37352 {"avx512dq",F_AVX512DQ},
37353 {"avx512cd",F_AVX512CD},
37354 {"avx512er",F_AVX512ER},
37355 {"avx512pf",F_AVX512PF},
37356 {"avx512vbmi",F_AVX512VBMI},
37357 {"avx512ifma",F_AVX512IFMA},
37360 tree __processor_model_type = build_processor_model_struct ();
37361 tree __cpu_model_var = make_var_decl (__processor_model_type,
37365 varpool_node::add (__cpu_model_var);
37367 gcc_assert ((args != NULL) && (*args != NULL));
37369 param_string_cst = *args;
37370 while (param_string_cst
37371 && TREE_CODE (param_string_cst) != STRING_CST)
37373 /* *args must be a expr that can contain other EXPRS leading to a
37375 if (!EXPR_P (param_string_cst))
37377 error ("Parameter to builtin must be a string constant or literal");
37378 return integer_zero_node;
37380 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
37383 gcc_assert (param_string_cst);
37385 if (fn_code == IX86_BUILTIN_CPU_IS)
37391 unsigned int field_val = 0;
37392 unsigned int NUM_ARCH_NAMES
37393 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
37395 for (i = 0; i < NUM_ARCH_NAMES; i++)
37396 if (strcmp (arch_names_table[i].name,
37397 TREE_STRING_POINTER (param_string_cst)) == 0)
37400 if (i == NUM_ARCH_NAMES)
37402 error ("Parameter to builtin not valid: %s",
37403 TREE_STRING_POINTER (param_string_cst));
37404 return integer_zero_node;
37407 field = TYPE_FIELDS (__processor_model_type);
37408 field_val = arch_names_table[i].model;
37410 /* CPU types are stored in the next field. */
37411 if (field_val > M_CPU_TYPE_START
37412 && field_val < M_CPU_SUBTYPE_START)
37414 field = DECL_CHAIN (field);
37415 field_val -= M_CPU_TYPE_START;
37418 /* CPU subtypes are stored in the next field. */
37419 if (field_val > M_CPU_SUBTYPE_START)
37421 field = DECL_CHAIN ( DECL_CHAIN (field));
37422 field_val -= M_CPU_SUBTYPE_START;
37425 /* Get the appropriate field in __cpu_model. */
37426 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
37429 /* Check the value. */
37430 final = build2 (EQ_EXPR, unsigned_type_node, ref,
37431 build_int_cstu (unsigned_type_node, field_val));
37432 return build1 (CONVERT_EXPR, integer_type_node, final);
37434 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
37441 unsigned int field_val = 0;
37442 unsigned int NUM_ISA_NAMES
37443 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
37445 for (i = 0; i < NUM_ISA_NAMES; i++)
37446 if (strcmp (isa_names_table[i].name,
37447 TREE_STRING_POINTER (param_string_cst)) == 0)
37450 if (i == NUM_ISA_NAMES)
37452 error ("Parameter to builtin not valid: %s",
37453 TREE_STRING_POINTER (param_string_cst));
37454 return integer_zero_node;
37457 field = TYPE_FIELDS (__processor_model_type);
37458 /* Get the last field, which is __cpu_features. */
37459 while (DECL_CHAIN (field))
37460 field = DECL_CHAIN (field);
37462 /* Get the appropriate field: __cpu_model.__cpu_features */
37463 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
37466 /* Access the 0th element of __cpu_features array. */
37467 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
37468 integer_zero_node, NULL_TREE, NULL_TREE);
37470 field_val = (1 << isa_names_table[i].feature);
37471 /* Return __cpu_model.__cpu_features[0] & field_val */
37472 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
37473 build_int_cstu (unsigned_type_node, field_val));
37474 return build1 (CONVERT_EXPR, integer_type_node, final);
37476 gcc_unreachable ();
37480 ix86_fold_builtin (tree fndecl, int n_args,
37481 tree *args, bool ignore ATTRIBUTE_UNUSED)
37483 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
37485 enum ix86_builtins fn_code = (enum ix86_builtins)
37486 DECL_FUNCTION_CODE (fndecl);
37487 if (fn_code == IX86_BUILTIN_CPU_IS
37488 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
37490 gcc_assert (n_args == 1);
37491 return fold_builtin_cpu (fndecl, args);
37495 #ifdef SUBTARGET_FOLD_BUILTIN
37496 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
37502 /* Make builtins to detect cpu type and features supported. NAME is
37503 the builtin name, CODE is the builtin code, and FTYPE is the function
37504 type of the builtin. */
37507 make_cpu_type_builtin (const char* name, int code,
37508 enum ix86_builtin_func_type ftype, bool is_const)
37513 type = ix86_get_builtin_func_type (ftype);
37514 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
37516 gcc_assert (decl != NULL_TREE);
37517 ix86_builtins[(int) code] = decl;
37518 TREE_READONLY (decl) = is_const;
37521 /* Make builtins to get CPU type and features supported. The created
37524 __builtin_cpu_init (), to detect cpu type and features,
37525 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
37526 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
37530 ix86_init_platform_type_builtins (void)
37532 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
37533 INT_FTYPE_VOID, false);
37534 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
37535 INT_FTYPE_PCCHAR, true);
37536 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
37537 INT_FTYPE_PCCHAR, true);
37540 /* Internal method for ix86_init_builtins. */
37543 ix86_init_builtins_va_builtins_abi (void)
37545 tree ms_va_ref, sysv_va_ref;
37546 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
37547 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
37548 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
37549 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
37553 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
37554 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
37555 ms_va_ref = build_reference_type (ms_va_list_type_node);
37557 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
37560 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
37561 fnvoid_va_start_ms =
37562 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
37563 fnvoid_va_end_sysv =
37564 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
37565 fnvoid_va_start_sysv =
37566 build_varargs_function_type_list (void_type_node, sysv_va_ref,
37568 fnvoid_va_copy_ms =
37569 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
37571 fnvoid_va_copy_sysv =
37572 build_function_type_list (void_type_node, sysv_va_ref,
37573 sysv_va_ref, NULL_TREE);
37575 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
37576 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
37577 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
37578 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
37579 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
37580 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
37581 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
37582 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
37583 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
37584 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
37585 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
37586 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
37590 ix86_init_builtin_types (void)
37592 tree float128_type_node, float80_type_node;
37594 /* The __float80 type. */
37595 float80_type_node = long_double_type_node;
37596 if (TYPE_MODE (float80_type_node) != XFmode)
37598 /* The __float80 type. */
37599 float80_type_node = make_node (REAL_TYPE);
37601 TYPE_PRECISION (float80_type_node) = 80;
37602 layout_type (float80_type_node);
37604 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
37606 /* The __float128 type. */
37607 float128_type_node = make_node (REAL_TYPE);
37608 TYPE_PRECISION (float128_type_node) = 128;
37609 layout_type (float128_type_node);
37610 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
37612 /* This macro is built by i386-builtin-types.awk. */
37613 DEFINE_BUILTIN_PRIMITIVE_TYPES;
37617 ix86_init_builtins (void)
37621 ix86_init_builtin_types ();
37623 /* Builtins to get CPU type and features. */
37624 ix86_init_platform_type_builtins ();
37626 /* TFmode support builtins. */
37627 def_builtin_const (0, "__builtin_infq",
37628 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
37629 def_builtin_const (0, "__builtin_huge_valq",
37630 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
37632 /* We will expand them to normal call if SSE isn't available since
37633 they are used by libgcc. */
37634 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
37635 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
37636 BUILT_IN_MD, "__fabstf2", NULL_TREE);
37637 TREE_READONLY (t) = 1;
37638 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
37640 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
37641 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
37642 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
37643 TREE_READONLY (t) = 1;
37644 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
37646 ix86_init_tm_builtins ();
37647 ix86_init_mmx_sse_builtins ();
37648 ix86_init_mpx_builtins ();
37651 ix86_init_builtins_va_builtins_abi ();
37653 #ifdef SUBTARGET_INIT_BUILTINS
37654 SUBTARGET_INIT_BUILTINS;
37658 /* Return the ix86 builtin for CODE. */
37661 ix86_builtin_decl (unsigned code, bool)
37663 if (code >= IX86_BUILTIN_MAX)
37664 return error_mark_node;
37666 return ix86_builtins[code];
37669 /* Errors in the source file can cause expand_expr to return const0_rtx
37670 where we expect a vector. To avoid crashing, use one of the vector
37671 clear instructions. */
37673 safe_vector_operand (rtx x, machine_mode mode)
37675 if (x == const0_rtx)
37676 x = CONST0_RTX (mode);
37680 /* Fixup modeless constants to fit required mode. */
37682 fixup_modeless_constant (rtx x, machine_mode mode)
37684 if (GET_MODE (x) == VOIDmode)
37685 x = convert_to_mode (mode, x, 1);
37689 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
37692 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
37695 tree arg0 = CALL_EXPR_ARG (exp, 0);
37696 tree arg1 = CALL_EXPR_ARG (exp, 1);
37697 rtx op0 = expand_normal (arg0);
37698 rtx op1 = expand_normal (arg1);
37699 machine_mode tmode = insn_data[icode].operand[0].mode;
37700 machine_mode mode0 = insn_data[icode].operand[1].mode;
37701 machine_mode mode1 = insn_data[icode].operand[2].mode;
37703 if (VECTOR_MODE_P (mode0))
37704 op0 = safe_vector_operand (op0, mode0);
37705 if (VECTOR_MODE_P (mode1))
37706 op1 = safe_vector_operand (op1, mode1);
37708 if (optimize || !target
37709 || GET_MODE (target) != tmode
37710 || !insn_data[icode].operand[0].predicate (target, tmode))
37711 target = gen_reg_rtx (tmode);
37713 if (GET_MODE (op1) == SImode && mode1 == TImode)
37715 rtx x = gen_reg_rtx (V4SImode);
37716 emit_insn (gen_sse2_loadd (x, op1));
37717 op1 = gen_lowpart (TImode, x);
37720 if (!insn_data[icode].operand[1].predicate (op0, mode0))
37721 op0 = copy_to_mode_reg (mode0, op0);
37722 if (!insn_data[icode].operand[2].predicate (op1, mode1))
37723 op1 = copy_to_mode_reg (mode1, op1);
37725 pat = GEN_FCN (icode) (target, op0, op1);
37734 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
37737 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
37738 enum ix86_builtin_func_type m_type,
37739 enum rtx_code sub_code)
37744 bool comparison_p = false;
37746 bool last_arg_constant = false;
37747 int num_memory = 0;
37753 machine_mode tmode = insn_data[icode].operand[0].mode;
37757 case MULTI_ARG_4_DF2_DI_I:
37758 case MULTI_ARG_4_DF2_DI_I1:
37759 case MULTI_ARG_4_SF2_SI_I:
37760 case MULTI_ARG_4_SF2_SI_I1:
37762 last_arg_constant = true;
37765 case MULTI_ARG_3_SF:
37766 case MULTI_ARG_3_DF:
37767 case MULTI_ARG_3_SF2:
37768 case MULTI_ARG_3_DF2:
37769 case MULTI_ARG_3_DI:
37770 case MULTI_ARG_3_SI:
37771 case MULTI_ARG_3_SI_DI:
37772 case MULTI_ARG_3_HI:
37773 case MULTI_ARG_3_HI_SI:
37774 case MULTI_ARG_3_QI:
37775 case MULTI_ARG_3_DI2:
37776 case MULTI_ARG_3_SI2:
37777 case MULTI_ARG_3_HI2:
37778 case MULTI_ARG_3_QI2:
37782 case MULTI_ARG_2_SF:
37783 case MULTI_ARG_2_DF:
37784 case MULTI_ARG_2_DI:
37785 case MULTI_ARG_2_SI:
37786 case MULTI_ARG_2_HI:
37787 case MULTI_ARG_2_QI:
37791 case MULTI_ARG_2_DI_IMM:
37792 case MULTI_ARG_2_SI_IMM:
37793 case MULTI_ARG_2_HI_IMM:
37794 case MULTI_ARG_2_QI_IMM:
37796 last_arg_constant = true;
37799 case MULTI_ARG_1_SF:
37800 case MULTI_ARG_1_DF:
37801 case MULTI_ARG_1_SF2:
37802 case MULTI_ARG_1_DF2:
37803 case MULTI_ARG_1_DI:
37804 case MULTI_ARG_1_SI:
37805 case MULTI_ARG_1_HI:
37806 case MULTI_ARG_1_QI:
37807 case MULTI_ARG_1_SI_DI:
37808 case MULTI_ARG_1_HI_DI:
37809 case MULTI_ARG_1_HI_SI:
37810 case MULTI_ARG_1_QI_DI:
37811 case MULTI_ARG_1_QI_SI:
37812 case MULTI_ARG_1_QI_HI:
37816 case MULTI_ARG_2_DI_CMP:
37817 case MULTI_ARG_2_SI_CMP:
37818 case MULTI_ARG_2_HI_CMP:
37819 case MULTI_ARG_2_QI_CMP:
37821 comparison_p = true;
37824 case MULTI_ARG_2_SF_TF:
37825 case MULTI_ARG_2_DF_TF:
37826 case MULTI_ARG_2_DI_TF:
37827 case MULTI_ARG_2_SI_TF:
37828 case MULTI_ARG_2_HI_TF:
37829 case MULTI_ARG_2_QI_TF:
37835 gcc_unreachable ();
37838 if (optimize || !target
37839 || GET_MODE (target) != tmode
37840 || !insn_data[icode].operand[0].predicate (target, tmode))
37841 target = gen_reg_rtx (tmode);
37843 gcc_assert (nargs <= 4);
37845 for (i = 0; i < nargs; i++)
37847 tree arg = CALL_EXPR_ARG (exp, i);
37848 rtx op = expand_normal (arg);
37849 int adjust = (comparison_p) ? 1 : 0;
37850 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
37852 if (last_arg_constant && i == nargs - 1)
37854 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
37856 enum insn_code new_icode = icode;
37859 case CODE_FOR_xop_vpermil2v2df3:
37860 case CODE_FOR_xop_vpermil2v4sf3:
37861 case CODE_FOR_xop_vpermil2v4df3:
37862 case CODE_FOR_xop_vpermil2v8sf3:
37863 error ("the last argument must be a 2-bit immediate");
37864 return gen_reg_rtx (tmode);
37865 case CODE_FOR_xop_rotlv2di3:
37866 new_icode = CODE_FOR_rotlv2di3;
37868 case CODE_FOR_xop_rotlv4si3:
37869 new_icode = CODE_FOR_rotlv4si3;
37871 case CODE_FOR_xop_rotlv8hi3:
37872 new_icode = CODE_FOR_rotlv8hi3;
37874 case CODE_FOR_xop_rotlv16qi3:
37875 new_icode = CODE_FOR_rotlv16qi3;
37877 if (CONST_INT_P (op))
37879 int mask = GET_MODE_UNIT_BITSIZE (tmode) - 1;
37880 op = GEN_INT (INTVAL (op) & mask);
37881 gcc_checking_assert
37882 (insn_data[icode].operand[i + 1].predicate (op, mode));
37886 gcc_checking_assert
37888 && insn_data[new_icode].operand[0].mode == tmode
37889 && insn_data[new_icode].operand[1].mode == tmode
37890 && insn_data[new_icode].operand[2].mode == mode
37891 && insn_data[new_icode].operand[0].predicate
37892 == insn_data[icode].operand[0].predicate
37893 && insn_data[new_icode].operand[1].predicate
37894 == insn_data[icode].operand[1].predicate);
37900 gcc_unreachable ();
37907 if (VECTOR_MODE_P (mode))
37908 op = safe_vector_operand (op, mode);
37910 /* If we aren't optimizing, only allow one memory operand to be
37912 if (memory_operand (op, mode))
37915 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
37918 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
37920 op = force_reg (mode, op);
37924 args[i].mode = mode;
37930 pat = GEN_FCN (icode) (target, args[0].op);
37935 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37936 GEN_INT ((int)sub_code));
37937 else if (! comparison_p)
37938 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37941 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
37945 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
37950 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
37954 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
37958 gcc_unreachable ();
37968 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
37969 insns with vec_merge. */
37972 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
37976 tree arg0 = CALL_EXPR_ARG (exp, 0);
37977 rtx op1, op0 = expand_normal (arg0);
37978 machine_mode tmode = insn_data[icode].operand[0].mode;
37979 machine_mode mode0 = insn_data[icode].operand[1].mode;
37981 if (optimize || !target
37982 || GET_MODE (target) != tmode
37983 || !insn_data[icode].operand[0].predicate (target, tmode))
37984 target = gen_reg_rtx (tmode);
37986 if (VECTOR_MODE_P (mode0))
37987 op0 = safe_vector_operand (op0, mode0);
37989 if ((optimize && !register_operand (op0, mode0))
37990 || !insn_data[icode].operand[1].predicate (op0, mode0))
37991 op0 = copy_to_mode_reg (mode0, op0);
37994 if (!insn_data[icode].operand[2].predicate (op1, mode0))
37995 op1 = copy_to_mode_reg (mode0, op1);
37997 pat = GEN_FCN (icode) (target, op0, op1);
38004 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
38007 ix86_expand_sse_compare (const struct builtin_description *d,
38008 tree exp, rtx target, bool swap)
38011 tree arg0 = CALL_EXPR_ARG (exp, 0);
38012 tree arg1 = CALL_EXPR_ARG (exp, 1);
38013 rtx op0 = expand_normal (arg0);
38014 rtx op1 = expand_normal (arg1);
38016 machine_mode tmode = insn_data[d->icode].operand[0].mode;
38017 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
38018 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
38019 enum rtx_code comparison = d->comparison;
38021 if (VECTOR_MODE_P (mode0))
38022 op0 = safe_vector_operand (op0, mode0);
38023 if (VECTOR_MODE_P (mode1))
38024 op1 = safe_vector_operand (op1, mode1);
38026 /* Swap operands if we have a comparison that isn't available in
38029 std::swap (op0, op1);
38031 if (optimize || !target
38032 || GET_MODE (target) != tmode
38033 || !insn_data[d->icode].operand[0].predicate (target, tmode))
38034 target = gen_reg_rtx (tmode);
38036 if ((optimize && !register_operand (op0, mode0))
38037 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
38038 op0 = copy_to_mode_reg (mode0, op0);
38039 if ((optimize && !register_operand (op1, mode1))
38040 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
38041 op1 = copy_to_mode_reg (mode1, op1);
38043 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
38044 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
38051 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
38054 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
38058 tree arg0 = CALL_EXPR_ARG (exp, 0);
38059 tree arg1 = CALL_EXPR_ARG (exp, 1);
38060 rtx op0 = expand_normal (arg0);
38061 rtx op1 = expand_normal (arg1);
38062 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
38063 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
38064 enum rtx_code comparison = d->comparison;
38066 if (VECTOR_MODE_P (mode0))
38067 op0 = safe_vector_operand (op0, mode0);
38068 if (VECTOR_MODE_P (mode1))
38069 op1 = safe_vector_operand (op1, mode1);
38071 /* Swap operands if we have a comparison that isn't available in
38073 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
38074 std::swap (op0, op1);
38076 target = gen_reg_rtx (SImode);
38077 emit_move_insn (target, const0_rtx);
38078 target = gen_rtx_SUBREG (QImode, target, 0);
38080 if ((optimize && !register_operand (op0, mode0))
38081 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
38082 op0 = copy_to_mode_reg (mode0, op0);
38083 if ((optimize && !register_operand (op1, mode1))
38084 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
38085 op1 = copy_to_mode_reg (mode1, op1);
38087 pat = GEN_FCN (d->icode) (op0, op1);
38091 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
38092 gen_rtx_fmt_ee (comparison, QImode,
38096 return SUBREG_REG (target);
38099 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
38102 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
38106 tree arg0 = CALL_EXPR_ARG (exp, 0);
38107 rtx op1, op0 = expand_normal (arg0);
38108 machine_mode tmode = insn_data[d->icode].operand[0].mode;
38109 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
38111 if (optimize || target == 0
38112 || GET_MODE (target) != tmode
38113 || !insn_data[d->icode].operand[0].predicate (target, tmode))
38114 target = gen_reg_rtx (tmode);
38116 if (VECTOR_MODE_P (mode0))
38117 op0 = safe_vector_operand (op0, mode0);
38119 if ((optimize && !register_operand (op0, mode0))
38120 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
38121 op0 = copy_to_mode_reg (mode0, op0);
38123 op1 = GEN_INT (d->comparison);
38125 pat = GEN_FCN (d->icode) (target, op0, op1);
38133 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
38134 tree exp, rtx target)
38137 tree arg0 = CALL_EXPR_ARG (exp, 0);
38138 tree arg1 = CALL_EXPR_ARG (exp, 1);
38139 rtx op0 = expand_normal (arg0);
38140 rtx op1 = expand_normal (arg1);
38142 machine_mode tmode = insn_data[d->icode].operand[0].mode;
38143 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
38144 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
38146 if (optimize || target == 0
38147 || GET_MODE (target) != tmode
38148 || !insn_data[d->icode].operand[0].predicate (target, tmode))
38149 target = gen_reg_rtx (tmode);
38151 op0 = safe_vector_operand (op0, mode0);
38152 op1 = safe_vector_operand (op1, mode1);
38154 if ((optimize && !register_operand (op0, mode0))
38155 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
38156 op0 = copy_to_mode_reg (mode0, op0);
38157 if ((optimize && !register_operand (op1, mode1))
38158 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
38159 op1 = copy_to_mode_reg (mode1, op1);
38161 op2 = GEN_INT (d->comparison);
38163 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
38170 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
38173 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
38177 tree arg0 = CALL_EXPR_ARG (exp, 0);
38178 tree arg1 = CALL_EXPR_ARG (exp, 1);
38179 rtx op0 = expand_normal (arg0);
38180 rtx op1 = expand_normal (arg1);
38181 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
38182 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
38183 enum rtx_code comparison = d->comparison;
38185 if (VECTOR_MODE_P (mode0))
38186 op0 = safe_vector_operand (op0, mode0);
38187 if (VECTOR_MODE_P (mode1))
38188 op1 = safe_vector_operand (op1, mode1);
38190 target = gen_reg_rtx (SImode);
38191 emit_move_insn (target, const0_rtx);
38192 target = gen_rtx_SUBREG (QImode, target, 0);
38194 if ((optimize && !register_operand (op0, mode0))
38195 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
38196 op0 = copy_to_mode_reg (mode0, op0);
38197 if ((optimize && !register_operand (op1, mode1))
38198 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
38199 op1 = copy_to_mode_reg (mode1, op1);
38201 pat = GEN_FCN (d->icode) (op0, op1);
38205 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
38206 gen_rtx_fmt_ee (comparison, QImode,
38210 return SUBREG_REG (target);
38213 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
38216 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
38217 tree exp, rtx target)
38220 tree arg0 = CALL_EXPR_ARG (exp, 0);
38221 tree arg1 = CALL_EXPR_ARG (exp, 1);
38222 tree arg2 = CALL_EXPR_ARG (exp, 2);
38223 tree arg3 = CALL_EXPR_ARG (exp, 3);
38224 tree arg4 = CALL_EXPR_ARG (exp, 4);
38225 rtx scratch0, scratch1;
38226 rtx op0 = expand_normal (arg0);
38227 rtx op1 = expand_normal (arg1);
38228 rtx op2 = expand_normal (arg2);
38229 rtx op3 = expand_normal (arg3);
38230 rtx op4 = expand_normal (arg4);
38231 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
38233 tmode0 = insn_data[d->icode].operand[0].mode;
38234 tmode1 = insn_data[d->icode].operand[1].mode;
38235 modev2 = insn_data[d->icode].operand[2].mode;
38236 modei3 = insn_data[d->icode].operand[3].mode;
38237 modev4 = insn_data[d->icode].operand[4].mode;
38238 modei5 = insn_data[d->icode].operand[5].mode;
38239 modeimm = insn_data[d->icode].operand[6].mode;
38241 if (VECTOR_MODE_P (modev2))
38242 op0 = safe_vector_operand (op0, modev2);
38243 if (VECTOR_MODE_P (modev4))
38244 op2 = safe_vector_operand (op2, modev4);
38246 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
38247 op0 = copy_to_mode_reg (modev2, op0);
38248 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
38249 op1 = copy_to_mode_reg (modei3, op1);
38250 if ((optimize && !register_operand (op2, modev4))
38251 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
38252 op2 = copy_to_mode_reg (modev4, op2);
38253 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
38254 op3 = copy_to_mode_reg (modei5, op3);
38256 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
38258 error ("the fifth argument must be an 8-bit immediate");
38262 if (d->code == IX86_BUILTIN_PCMPESTRI128)
38264 if (optimize || !target
38265 || GET_MODE (target) != tmode0
38266 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
38267 target = gen_reg_rtx (tmode0);
38269 scratch1 = gen_reg_rtx (tmode1);
38271 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
38273 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
38275 if (optimize || !target
38276 || GET_MODE (target) != tmode1
38277 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
38278 target = gen_reg_rtx (tmode1);
38280 scratch0 = gen_reg_rtx (tmode0);
38282 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
38286 gcc_assert (d->flag);
38288 scratch0 = gen_reg_rtx (tmode0);
38289 scratch1 = gen_reg_rtx (tmode1);
38291 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
38301 target = gen_reg_rtx (SImode);
38302 emit_move_insn (target, const0_rtx);
38303 target = gen_rtx_SUBREG (QImode, target, 0);
38306 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
38307 gen_rtx_fmt_ee (EQ, QImode,
38308 gen_rtx_REG ((machine_mode) d->flag,
38311 return SUBREG_REG (target);
38318 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
38321 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
38322 tree exp, rtx target)
38325 tree arg0 = CALL_EXPR_ARG (exp, 0);
38326 tree arg1 = CALL_EXPR_ARG (exp, 1);
38327 tree arg2 = CALL_EXPR_ARG (exp, 2);
38328 rtx scratch0, scratch1;
38329 rtx op0 = expand_normal (arg0);
38330 rtx op1 = expand_normal (arg1);
38331 rtx op2 = expand_normal (arg2);
38332 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
38334 tmode0 = insn_data[d->icode].operand[0].mode;
38335 tmode1 = insn_data[d->icode].operand[1].mode;
38336 modev2 = insn_data[d->icode].operand[2].mode;
38337 modev3 = insn_data[d->icode].operand[3].mode;
38338 modeimm = insn_data[d->icode].operand[4].mode;
38340 if (VECTOR_MODE_P (modev2))
38341 op0 = safe_vector_operand (op0, modev2);
38342 if (VECTOR_MODE_P (modev3))
38343 op1 = safe_vector_operand (op1, modev3);
38345 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
38346 op0 = copy_to_mode_reg (modev2, op0);
38347 if ((optimize && !register_operand (op1, modev3))
38348 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
38349 op1 = copy_to_mode_reg (modev3, op1);
38351 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
38353 error ("the third argument must be an 8-bit immediate");
38357 if (d->code == IX86_BUILTIN_PCMPISTRI128)
38359 if (optimize || !target
38360 || GET_MODE (target) != tmode0
38361 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
38362 target = gen_reg_rtx (tmode0);
38364 scratch1 = gen_reg_rtx (tmode1);
38366 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
38368 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
38370 if (optimize || !target
38371 || GET_MODE (target) != tmode1
38372 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
38373 target = gen_reg_rtx (tmode1);
38375 scratch0 = gen_reg_rtx (tmode0);
38377 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
38381 gcc_assert (d->flag);
38383 scratch0 = gen_reg_rtx (tmode0);
38384 scratch1 = gen_reg_rtx (tmode1);
38386 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
38396 target = gen_reg_rtx (SImode);
38397 emit_move_insn (target, const0_rtx);
38398 target = gen_rtx_SUBREG (QImode, target, 0);
38401 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
38402 gen_rtx_fmt_ee (EQ, QImode,
38403 gen_rtx_REG ((machine_mode) d->flag,
38406 return SUBREG_REG (target);
38412 /* Subroutine of ix86_expand_builtin to take care of insns with
38413 variable number of operands. */
38416 ix86_expand_args_builtin (const struct builtin_description *d,
38417 tree exp, rtx target)
38419 rtx pat, real_target;
38420 unsigned int i, nargs;
38421 unsigned int nargs_constant = 0;
38422 unsigned int mask_pos = 0;
38423 int num_memory = 0;
38429 bool last_arg_count = false;
38430 enum insn_code icode = d->icode;
38431 const struct insn_data_d *insn_p = &insn_data[icode];
38432 machine_mode tmode = insn_p->operand[0].mode;
38433 machine_mode rmode = VOIDmode;
38435 enum rtx_code comparison = d->comparison;
38437 switch ((enum ix86_builtin_func_type) d->flag)
38439 case V2DF_FTYPE_V2DF_ROUND:
38440 case V4DF_FTYPE_V4DF_ROUND:
38441 case V4SF_FTYPE_V4SF_ROUND:
38442 case V8SF_FTYPE_V8SF_ROUND:
38443 case V4SI_FTYPE_V4SF_ROUND:
38444 case V8SI_FTYPE_V8SF_ROUND:
38445 return ix86_expand_sse_round (d, exp, target);
38446 case V4SI_FTYPE_V2DF_V2DF_ROUND:
38447 case V8SI_FTYPE_V4DF_V4DF_ROUND:
38448 case V16SI_FTYPE_V8DF_V8DF_ROUND:
38449 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
38450 case INT_FTYPE_V8SF_V8SF_PTEST:
38451 case INT_FTYPE_V4DI_V4DI_PTEST:
38452 case INT_FTYPE_V4DF_V4DF_PTEST:
38453 case INT_FTYPE_V4SF_V4SF_PTEST:
38454 case INT_FTYPE_V2DI_V2DI_PTEST:
38455 case INT_FTYPE_V2DF_V2DF_PTEST:
38456 return ix86_expand_sse_ptest (d, exp, target);
38457 case FLOAT128_FTYPE_FLOAT128:
38458 case FLOAT_FTYPE_FLOAT:
38459 case INT_FTYPE_INT:
38460 case UINT64_FTYPE_INT:
38461 case UINT16_FTYPE_UINT16:
38462 case INT64_FTYPE_INT64:
38463 case INT64_FTYPE_V4SF:
38464 case INT64_FTYPE_V2DF:
38465 case INT_FTYPE_V16QI:
38466 case INT_FTYPE_V8QI:
38467 case INT_FTYPE_V8SF:
38468 case INT_FTYPE_V4DF:
38469 case INT_FTYPE_V4SF:
38470 case INT_FTYPE_V2DF:
38471 case INT_FTYPE_V32QI:
38472 case V16QI_FTYPE_V16QI:
38473 case V8SI_FTYPE_V8SF:
38474 case V8SI_FTYPE_V4SI:
38475 case V8HI_FTYPE_V8HI:
38476 case V8HI_FTYPE_V16QI:
38477 case V8QI_FTYPE_V8QI:
38478 case V8SF_FTYPE_V8SF:
38479 case V8SF_FTYPE_V8SI:
38480 case V8SF_FTYPE_V4SF:
38481 case V8SF_FTYPE_V8HI:
38482 case V4SI_FTYPE_V4SI:
38483 case V4SI_FTYPE_V16QI:
38484 case V4SI_FTYPE_V4SF:
38485 case V4SI_FTYPE_V8SI:
38486 case V4SI_FTYPE_V8HI:
38487 case V4SI_FTYPE_V4DF:
38488 case V4SI_FTYPE_V2DF:
38489 case V4HI_FTYPE_V4HI:
38490 case V4DF_FTYPE_V4DF:
38491 case V4DF_FTYPE_V4SI:
38492 case V4DF_FTYPE_V4SF:
38493 case V4DF_FTYPE_V2DF:
38494 case V4SF_FTYPE_V4SF:
38495 case V4SF_FTYPE_V4SI:
38496 case V4SF_FTYPE_V8SF:
38497 case V4SF_FTYPE_V4DF:
38498 case V4SF_FTYPE_V8HI:
38499 case V4SF_FTYPE_V2DF:
38500 case V2DI_FTYPE_V2DI:
38501 case V2DI_FTYPE_V16QI:
38502 case V2DI_FTYPE_V8HI:
38503 case V2DI_FTYPE_V4SI:
38504 case V2DF_FTYPE_V2DF:
38505 case V2DF_FTYPE_V4SI:
38506 case V2DF_FTYPE_V4DF:
38507 case V2DF_FTYPE_V4SF:
38508 case V2DF_FTYPE_V2SI:
38509 case V2SI_FTYPE_V2SI:
38510 case V2SI_FTYPE_V4SF:
38511 case V2SI_FTYPE_V2SF:
38512 case V2SI_FTYPE_V2DF:
38513 case V2SF_FTYPE_V2SF:
38514 case V2SF_FTYPE_V2SI:
38515 case V32QI_FTYPE_V32QI:
38516 case V32QI_FTYPE_V16QI:
38517 case V16HI_FTYPE_V16HI:
38518 case V16HI_FTYPE_V8HI:
38519 case V8SI_FTYPE_V8SI:
38520 case V16HI_FTYPE_V16QI:
38521 case V8SI_FTYPE_V16QI:
38522 case V4DI_FTYPE_V16QI:
38523 case V8SI_FTYPE_V8HI:
38524 case V4DI_FTYPE_V8HI:
38525 case V4DI_FTYPE_V4SI:
38526 case V4DI_FTYPE_V2DI:
38527 case UHI_FTYPE_UHI:
38528 case UHI_FTYPE_V16QI:
38529 case USI_FTYPE_V32QI:
38530 case UDI_FTYPE_V64QI:
38531 case V16QI_FTYPE_UHI:
38532 case V32QI_FTYPE_USI:
38533 case V64QI_FTYPE_UDI:
38534 case V8HI_FTYPE_UQI:
38535 case V16HI_FTYPE_UHI:
38536 case V32HI_FTYPE_USI:
38537 case V4SI_FTYPE_UQI:
38538 case V8SI_FTYPE_UQI:
38539 case V4SI_FTYPE_UHI:
38540 case V8SI_FTYPE_UHI:
38541 case UQI_FTYPE_V8HI:
38542 case UHI_FTYPE_V16HI:
38543 case USI_FTYPE_V32HI:
38544 case UQI_FTYPE_V4SI:
38545 case UQI_FTYPE_V8SI:
38546 case UHI_FTYPE_V16SI:
38547 case UQI_FTYPE_V2DI:
38548 case UQI_FTYPE_V4DI:
38549 case UQI_FTYPE_V8DI:
38550 case V16SI_FTYPE_UHI:
38551 case V2DI_FTYPE_UQI:
38552 case V4DI_FTYPE_UQI:
38553 case V16SI_FTYPE_INT:
38554 case V16SF_FTYPE_V8SF:
38555 case V16SI_FTYPE_V8SI:
38556 case V16SF_FTYPE_V4SF:
38557 case V16SI_FTYPE_V4SI:
38558 case V16SF_FTYPE_V16SF:
38559 case V8DI_FTYPE_UQI:
38560 case V8DF_FTYPE_V4DF:
38561 case V8DF_FTYPE_V2DF:
38562 case V8DF_FTYPE_V8DF:
38565 case V4SF_FTYPE_V4SF_VEC_MERGE:
38566 case V2DF_FTYPE_V2DF_VEC_MERGE:
38567 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
38568 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
38569 case V16QI_FTYPE_V16QI_V16QI:
38570 case V16QI_FTYPE_V8HI_V8HI:
38571 case V16SF_FTYPE_V16SF_V16SF:
38572 case V8QI_FTYPE_V8QI_V8QI:
38573 case V8QI_FTYPE_V4HI_V4HI:
38574 case V8HI_FTYPE_V8HI_V8HI:
38575 case V8HI_FTYPE_V16QI_V16QI:
38576 case V8HI_FTYPE_V4SI_V4SI:
38577 case V8SF_FTYPE_V8SF_V8SF:
38578 case V8SF_FTYPE_V8SF_V8SI:
38579 case V8DF_FTYPE_V8DF_V8DF:
38580 case V4SI_FTYPE_V4SI_V4SI:
38581 case V4SI_FTYPE_V8HI_V8HI:
38582 case V4SI_FTYPE_V2DF_V2DF:
38583 case V4HI_FTYPE_V4HI_V4HI:
38584 case V4HI_FTYPE_V8QI_V8QI:
38585 case V4HI_FTYPE_V2SI_V2SI:
38586 case V4DF_FTYPE_V4DF_V4DF:
38587 case V4DF_FTYPE_V4DF_V4DI:
38588 case V4SF_FTYPE_V4SF_V4SF:
38589 case V4SF_FTYPE_V4SF_V4SI:
38590 case V4SF_FTYPE_V4SF_V2SI:
38591 case V4SF_FTYPE_V4SF_V2DF:
38592 case V4SF_FTYPE_V4SF_UINT:
38593 case V4SF_FTYPE_V4SF_DI:
38594 case V4SF_FTYPE_V4SF_SI:
38595 case V2DI_FTYPE_V2DI_V2DI:
38596 case V2DI_FTYPE_V16QI_V16QI:
38597 case V2DI_FTYPE_V4SI_V4SI:
38598 case V2DI_FTYPE_V2DI_V16QI:
38599 case V2SI_FTYPE_V2SI_V2SI:
38600 case V2SI_FTYPE_V4HI_V4HI:
38601 case V2SI_FTYPE_V2SF_V2SF:
38602 case V2DF_FTYPE_V2DF_V2DF:
38603 case V2DF_FTYPE_V2DF_V4SF:
38604 case V2DF_FTYPE_V2DF_V2DI:
38605 case V2DF_FTYPE_V2DF_DI:
38606 case V2DF_FTYPE_V2DF_SI:
38607 case V2DF_FTYPE_V2DF_UINT:
38608 case V2SF_FTYPE_V2SF_V2SF:
38609 case V1DI_FTYPE_V1DI_V1DI:
38610 case V1DI_FTYPE_V8QI_V8QI:
38611 case V1DI_FTYPE_V2SI_V2SI:
38612 case V32QI_FTYPE_V16HI_V16HI:
38613 case V16HI_FTYPE_V8SI_V8SI:
38614 case V32QI_FTYPE_V32QI_V32QI:
38615 case V16HI_FTYPE_V32QI_V32QI:
38616 case V16HI_FTYPE_V16HI_V16HI:
38617 case V8SI_FTYPE_V4DF_V4DF:
38618 case V8SI_FTYPE_V8SI_V8SI:
38619 case V8SI_FTYPE_V16HI_V16HI:
38620 case V4DI_FTYPE_V4DI_V4DI:
38621 case V4DI_FTYPE_V8SI_V8SI:
38622 case V8DI_FTYPE_V64QI_V64QI:
38623 if (comparison == UNKNOWN)
38624 return ix86_expand_binop_builtin (icode, exp, target);
38627 case V4SF_FTYPE_V4SF_V4SF_SWAP:
38628 case V2DF_FTYPE_V2DF_V2DF_SWAP:
38629 gcc_assert (comparison != UNKNOWN);
38633 case V16HI_FTYPE_V16HI_V8HI_COUNT:
38634 case V16HI_FTYPE_V16HI_SI_COUNT:
38635 case V8SI_FTYPE_V8SI_V4SI_COUNT:
38636 case V8SI_FTYPE_V8SI_SI_COUNT:
38637 case V4DI_FTYPE_V4DI_V2DI_COUNT:
38638 case V4DI_FTYPE_V4DI_INT_COUNT:
38639 case V8HI_FTYPE_V8HI_V8HI_COUNT:
38640 case V8HI_FTYPE_V8HI_SI_COUNT:
38641 case V4SI_FTYPE_V4SI_V4SI_COUNT:
38642 case V4SI_FTYPE_V4SI_SI_COUNT:
38643 case V4HI_FTYPE_V4HI_V4HI_COUNT:
38644 case V4HI_FTYPE_V4HI_SI_COUNT:
38645 case V2DI_FTYPE_V2DI_V2DI_COUNT:
38646 case V2DI_FTYPE_V2DI_SI_COUNT:
38647 case V2SI_FTYPE_V2SI_V2SI_COUNT:
38648 case V2SI_FTYPE_V2SI_SI_COUNT:
38649 case V1DI_FTYPE_V1DI_V1DI_COUNT:
38650 case V1DI_FTYPE_V1DI_SI_COUNT:
38652 last_arg_count = true;
38654 case UINT64_FTYPE_UINT64_UINT64:
38655 case UINT_FTYPE_UINT_UINT:
38656 case UINT_FTYPE_UINT_USHORT:
38657 case UINT_FTYPE_UINT_UCHAR:
38658 case UINT16_FTYPE_UINT16_INT:
38659 case UINT8_FTYPE_UINT8_INT:
38660 case UHI_FTYPE_UHI_UHI:
38661 case USI_FTYPE_USI_USI:
38662 case UDI_FTYPE_UDI_UDI:
38663 case V16SI_FTYPE_V8DF_V8DF:
38666 case V2DI_FTYPE_V2DI_INT_CONVERT:
38669 nargs_constant = 1;
38671 case V4DI_FTYPE_V4DI_INT_CONVERT:
38674 nargs_constant = 1;
38676 case V8DI_FTYPE_V8DI_INT_CONVERT:
38679 nargs_constant = 1;
38681 case V8HI_FTYPE_V8HI_INT:
38682 case V8HI_FTYPE_V8SF_INT:
38683 case V16HI_FTYPE_V16SF_INT:
38684 case V8HI_FTYPE_V4SF_INT:
38685 case V8SF_FTYPE_V8SF_INT:
38686 case V4SF_FTYPE_V16SF_INT:
38687 case V16SF_FTYPE_V16SF_INT:
38688 case V4SI_FTYPE_V4SI_INT:
38689 case V4SI_FTYPE_V8SI_INT:
38690 case V4HI_FTYPE_V4HI_INT:
38691 case V4DF_FTYPE_V4DF_INT:
38692 case V4DF_FTYPE_V8DF_INT:
38693 case V4SF_FTYPE_V4SF_INT:
38694 case V4SF_FTYPE_V8SF_INT:
38695 case V2DI_FTYPE_V2DI_INT:
38696 case V2DF_FTYPE_V2DF_INT:
38697 case V2DF_FTYPE_V4DF_INT:
38698 case V16HI_FTYPE_V16HI_INT:
38699 case V8SI_FTYPE_V8SI_INT:
38700 case V16SI_FTYPE_V16SI_INT:
38701 case V4SI_FTYPE_V16SI_INT:
38702 case V4DI_FTYPE_V4DI_INT:
38703 case V2DI_FTYPE_V4DI_INT:
38704 case V4DI_FTYPE_V8DI_INT:
38705 case QI_FTYPE_V4SF_INT:
38706 case QI_FTYPE_V2DF_INT:
38708 nargs_constant = 1;
38710 case V16QI_FTYPE_V16QI_V16QI_V16QI:
38711 case V8SF_FTYPE_V8SF_V8SF_V8SF:
38712 case V4DF_FTYPE_V4DF_V4DF_V4DF:
38713 case V4SF_FTYPE_V4SF_V4SF_V4SF:
38714 case V2DF_FTYPE_V2DF_V2DF_V2DF:
38715 case V32QI_FTYPE_V32QI_V32QI_V32QI:
38716 case UHI_FTYPE_V16SI_V16SI_UHI:
38717 case UQI_FTYPE_V8DI_V8DI_UQI:
38718 case V16HI_FTYPE_V16SI_V16HI_UHI:
38719 case V16QI_FTYPE_V16SI_V16QI_UHI:
38720 case V16QI_FTYPE_V8DI_V16QI_UQI:
38721 case V16SF_FTYPE_V16SF_V16SF_UHI:
38722 case V16SF_FTYPE_V4SF_V16SF_UHI:
38723 case V16SI_FTYPE_SI_V16SI_UHI:
38724 case V16SI_FTYPE_V16HI_V16SI_UHI:
38725 case V16SI_FTYPE_V16QI_V16SI_UHI:
38726 case V8SF_FTYPE_V4SF_V8SF_UQI:
38727 case V4DF_FTYPE_V2DF_V4DF_UQI:
38728 case V8SI_FTYPE_V4SI_V8SI_UQI:
38729 case V8SI_FTYPE_SI_V8SI_UQI:
38730 case V4SI_FTYPE_V4SI_V4SI_UQI:
38731 case V4SI_FTYPE_SI_V4SI_UQI:
38732 case V4DI_FTYPE_V2DI_V4DI_UQI:
38733 case V4DI_FTYPE_DI_V4DI_UQI:
38734 case V2DI_FTYPE_V2DI_V2DI_UQI:
38735 case V2DI_FTYPE_DI_V2DI_UQI:
38736 case V64QI_FTYPE_V64QI_V64QI_UDI:
38737 case V64QI_FTYPE_V16QI_V64QI_UDI:
38738 case V64QI_FTYPE_QI_V64QI_UDI:
38739 case V32QI_FTYPE_V32QI_V32QI_USI:
38740 case V32QI_FTYPE_V16QI_V32QI_USI:
38741 case V32QI_FTYPE_QI_V32QI_USI:
38742 case V16QI_FTYPE_V16QI_V16QI_UHI:
38743 case V16QI_FTYPE_QI_V16QI_UHI:
38744 case V32HI_FTYPE_V8HI_V32HI_USI:
38745 case V32HI_FTYPE_HI_V32HI_USI:
38746 case V16HI_FTYPE_V8HI_V16HI_UHI:
38747 case V16HI_FTYPE_HI_V16HI_UHI:
38748 case V8HI_FTYPE_V8HI_V8HI_UQI:
38749 case V8HI_FTYPE_HI_V8HI_UQI:
38750 case V8SF_FTYPE_V8HI_V8SF_UQI:
38751 case V4SF_FTYPE_V8HI_V4SF_UQI:
38752 case V8SI_FTYPE_V8SF_V8SI_UQI:
38753 case V4SI_FTYPE_V4SF_V4SI_UQI:
38754 case V4DI_FTYPE_V4SF_V4DI_UQI:
38755 case V2DI_FTYPE_V4SF_V2DI_UQI:
38756 case V4SF_FTYPE_V4DI_V4SF_UQI:
38757 case V4SF_FTYPE_V2DI_V4SF_UQI:
38758 case V4DF_FTYPE_V4DI_V4DF_UQI:
38759 case V2DF_FTYPE_V2DI_V2DF_UQI:
38760 case V16QI_FTYPE_V8HI_V16QI_UQI:
38761 case V16QI_FTYPE_V16HI_V16QI_UHI:
38762 case V16QI_FTYPE_V4SI_V16QI_UQI:
38763 case V16QI_FTYPE_V8SI_V16QI_UQI:
38764 case V8HI_FTYPE_V4SI_V8HI_UQI:
38765 case V8HI_FTYPE_V8SI_V8HI_UQI:
38766 case V16QI_FTYPE_V2DI_V16QI_UQI:
38767 case V16QI_FTYPE_V4DI_V16QI_UQI:
38768 case V8HI_FTYPE_V2DI_V8HI_UQI:
38769 case V8HI_FTYPE_V4DI_V8HI_UQI:
38770 case V4SI_FTYPE_V2DI_V4SI_UQI:
38771 case V4SI_FTYPE_V4DI_V4SI_UQI:
38772 case V32QI_FTYPE_V32HI_V32QI_USI:
38773 case UHI_FTYPE_V16QI_V16QI_UHI:
38774 case USI_FTYPE_V32QI_V32QI_USI:
38775 case UDI_FTYPE_V64QI_V64QI_UDI:
38776 case UQI_FTYPE_V8HI_V8HI_UQI:
38777 case UHI_FTYPE_V16HI_V16HI_UHI:
38778 case USI_FTYPE_V32HI_V32HI_USI:
38779 case UQI_FTYPE_V4SI_V4SI_UQI:
38780 case UQI_FTYPE_V8SI_V8SI_UQI:
38781 case UQI_FTYPE_V2DI_V2DI_UQI:
38782 case UQI_FTYPE_V4DI_V4DI_UQI:
38783 case V4SF_FTYPE_V2DF_V4SF_UQI:
38784 case V4SF_FTYPE_V4DF_V4SF_UQI:
38785 case V16SI_FTYPE_V16SI_V16SI_UHI:
38786 case V16SI_FTYPE_V4SI_V16SI_UHI:
38787 case V2DI_FTYPE_V4SI_V2DI_UQI:
38788 case V2DI_FTYPE_V8HI_V2DI_UQI:
38789 case V2DI_FTYPE_V16QI_V2DI_UQI:
38790 case V4DI_FTYPE_V4DI_V4DI_UQI:
38791 case V4DI_FTYPE_V4SI_V4DI_UQI:
38792 case V4DI_FTYPE_V8HI_V4DI_UQI:
38793 case V4DI_FTYPE_V16QI_V4DI_UQI:
38794 case V4DI_FTYPE_V4DF_V4DI_UQI:
38795 case V2DI_FTYPE_V2DF_V2DI_UQI:
38796 case V4SI_FTYPE_V4DF_V4SI_UQI:
38797 case V4SI_FTYPE_V2DF_V4SI_UQI:
38798 case V4SI_FTYPE_V8HI_V4SI_UQI:
38799 case V4SI_FTYPE_V16QI_V4SI_UQI:
38800 case V4DI_FTYPE_V4DI_V4DI_V4DI:
38801 case V8DF_FTYPE_V2DF_V8DF_UQI:
38802 case V8DF_FTYPE_V4DF_V8DF_UQI:
38803 case V8DF_FTYPE_V8DF_V8DF_UQI:
38804 case V8SF_FTYPE_V8SF_V8SF_UQI:
38805 case V8SF_FTYPE_V8SI_V8SF_UQI:
38806 case V4DF_FTYPE_V4DF_V4DF_UQI:
38807 case V4SF_FTYPE_V4SF_V4SF_UQI:
38808 case V2DF_FTYPE_V2DF_V2DF_UQI:
38809 case V2DF_FTYPE_V4SF_V2DF_UQI:
38810 case V2DF_FTYPE_V4SI_V2DF_UQI:
38811 case V4SF_FTYPE_V4SI_V4SF_UQI:
38812 case V4DF_FTYPE_V4SF_V4DF_UQI:
38813 case V4DF_FTYPE_V4SI_V4DF_UQI:
38814 case V8SI_FTYPE_V8SI_V8SI_UQI:
38815 case V8SI_FTYPE_V8HI_V8SI_UQI:
38816 case V8SI_FTYPE_V16QI_V8SI_UQI:
38817 case V8DF_FTYPE_V8SI_V8DF_UQI:
38818 case V8DI_FTYPE_DI_V8DI_UQI:
38819 case V16SF_FTYPE_V8SF_V16SF_UHI:
38820 case V16SI_FTYPE_V8SI_V16SI_UHI:
38821 case V16HI_FTYPE_V16HI_V16HI_UHI:
38822 case V8HI_FTYPE_V16QI_V8HI_UQI:
38823 case V16HI_FTYPE_V16QI_V16HI_UHI:
38824 case V32HI_FTYPE_V32HI_V32HI_USI:
38825 case V32HI_FTYPE_V32QI_V32HI_USI:
38826 case V8DI_FTYPE_V16QI_V8DI_UQI:
38827 case V8DI_FTYPE_V2DI_V8DI_UQI:
38828 case V8DI_FTYPE_V4DI_V8DI_UQI:
38829 case V8DI_FTYPE_V8DI_V8DI_UQI:
38830 case V8DI_FTYPE_V8HI_V8DI_UQI:
38831 case V8DI_FTYPE_V8SI_V8DI_UQI:
38832 case V8HI_FTYPE_V8DI_V8HI_UQI:
38833 case V8SI_FTYPE_V8DI_V8SI_UQI:
38834 case V4SI_FTYPE_V4SI_V4SI_V4SI:
38837 case V32QI_FTYPE_V32QI_V32QI_INT:
38838 case V16HI_FTYPE_V16HI_V16HI_INT:
38839 case V16QI_FTYPE_V16QI_V16QI_INT:
38840 case V4DI_FTYPE_V4DI_V4DI_INT:
38841 case V8HI_FTYPE_V8HI_V8HI_INT:
38842 case V8SI_FTYPE_V8SI_V8SI_INT:
38843 case V8SI_FTYPE_V8SI_V4SI_INT:
38844 case V8SF_FTYPE_V8SF_V8SF_INT:
38845 case V8SF_FTYPE_V8SF_V4SF_INT:
38846 case V4SI_FTYPE_V4SI_V4SI_INT:
38847 case V4DF_FTYPE_V4DF_V4DF_INT:
38848 case V16SF_FTYPE_V16SF_V16SF_INT:
38849 case V16SF_FTYPE_V16SF_V4SF_INT:
38850 case V16SI_FTYPE_V16SI_V4SI_INT:
38851 case V4DF_FTYPE_V4DF_V2DF_INT:
38852 case V4SF_FTYPE_V4SF_V4SF_INT:
38853 case V2DI_FTYPE_V2DI_V2DI_INT:
38854 case V4DI_FTYPE_V4DI_V2DI_INT:
38855 case V2DF_FTYPE_V2DF_V2DF_INT:
38856 case UQI_FTYPE_V8DI_V8UDI_INT:
38857 case UQI_FTYPE_V8DF_V8DF_INT:
38858 case UQI_FTYPE_V2DF_V2DF_INT:
38859 case UQI_FTYPE_V4SF_V4SF_INT:
38860 case UHI_FTYPE_V16SI_V16SI_INT:
38861 case UHI_FTYPE_V16SF_V16SF_INT:
38863 nargs_constant = 1;
38865 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
38868 nargs_constant = 1;
38870 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
38873 nargs_constant = 1;
38875 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
38878 nargs_constant = 1;
38880 case V2DI_FTYPE_V2DI_UINT_UINT:
38882 nargs_constant = 2;
38884 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
38887 nargs_constant = 1;
38889 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT:
38893 nargs_constant = 1;
38895 case QI_FTYPE_V8DF_INT_UQI:
38896 case QI_FTYPE_V4DF_INT_UQI:
38897 case QI_FTYPE_V2DF_INT_UQI:
38898 case HI_FTYPE_V16SF_INT_UHI:
38899 case QI_FTYPE_V8SF_INT_UQI:
38900 case QI_FTYPE_V4SF_INT_UQI:
38903 nargs_constant = 1;
38905 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT:
38909 nargs_constant = 1;
38911 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT:
38915 nargs_constant = 1;
38917 case V32QI_FTYPE_V32QI_V32QI_V32QI_USI:
38918 case V32HI_FTYPE_V32HI_V32HI_V32HI_USI:
38919 case V32HI_FTYPE_V64QI_V64QI_V32HI_USI:
38920 case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI:
38921 case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI:
38922 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI:
38923 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI:
38924 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI:
38925 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI:
38926 case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI:
38927 case V32QI_FTYPE_V16HI_V16HI_V32QI_USI:
38928 case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI:
38929 case V32HI_FTYPE_V16SI_V16SI_V32HI_USI:
38930 case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI:
38931 case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI:
38932 case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI:
38933 case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI:
38934 case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI:
38935 case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI:
38936 case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI:
38937 case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI:
38938 case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI:
38939 case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI:
38940 case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI:
38941 case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI:
38942 case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI:
38943 case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI:
38944 case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI:
38945 case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI:
38946 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI:
38947 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI:
38948 case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI:
38949 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI:
38950 case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI:
38951 case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI:
38952 case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI:
38953 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI:
38954 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI:
38955 case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI:
38956 case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI:
38957 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI:
38958 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI:
38959 case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI:
38960 case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI:
38961 case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI:
38962 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI:
38963 case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI:
38964 case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI:
38965 case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI:
38966 case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI:
38967 case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI:
38970 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
38971 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
38972 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
38973 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
38974 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
38976 nargs_constant = 1;
38978 case UQI_FTYPE_V4DI_V4DI_INT_UQI:
38979 case UQI_FTYPE_V8SI_V8SI_INT_UQI:
38980 case QI_FTYPE_V4DF_V4DF_INT_UQI:
38981 case QI_FTYPE_V8SF_V8SF_INT_UQI:
38982 case UQI_FTYPE_V2DI_V2DI_INT_UQI:
38983 case UQI_FTYPE_V4SI_V4SI_INT_UQI:
38984 case UQI_FTYPE_V2DF_V2DF_INT_UQI:
38985 case UQI_FTYPE_V4SF_V4SF_INT_UQI:
38986 case UDI_FTYPE_V64QI_V64QI_INT_UDI:
38987 case USI_FTYPE_V32QI_V32QI_INT_USI:
38988 case UHI_FTYPE_V16QI_V16QI_INT_UHI:
38989 case USI_FTYPE_V32HI_V32HI_INT_USI:
38990 case UHI_FTYPE_V16HI_V16HI_INT_UHI:
38991 case UQI_FTYPE_V8HI_V8HI_INT_UQI:
38994 nargs_constant = 1;
38996 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
38998 nargs_constant = 2;
39000 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
39001 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
39004 case UQI_FTYPE_V8DI_V8DI_INT_UQI:
39005 case UHI_FTYPE_V16SI_V16SI_INT_UHI:
39008 nargs_constant = 1;
39010 case V8SF_FTYPE_V8SF_INT_V8SF_UQI:
39011 case V4SF_FTYPE_V4SF_INT_V4SF_UQI:
39012 case V2DF_FTYPE_V4DF_INT_V2DF_UQI:
39013 case V2DI_FTYPE_V4DI_INT_V2DI_UQI:
39014 case V8SF_FTYPE_V16SF_INT_V8SF_UQI:
39015 case V8SI_FTYPE_V16SI_INT_V8SI_UQI:
39016 case V2DF_FTYPE_V8DF_INT_V2DF_UQI:
39017 case V2DI_FTYPE_V8DI_INT_V2DI_UQI:
39018 case V4SF_FTYPE_V8SF_INT_V4SF_UQI:
39019 case V4SI_FTYPE_V8SI_INT_V4SI_UQI:
39020 case V8HI_FTYPE_V8SF_INT_V8HI_UQI:
39021 case V8HI_FTYPE_V4SF_INT_V8HI_UQI:
39022 case V32HI_FTYPE_V32HI_INT_V32HI_USI:
39023 case V16HI_FTYPE_V16HI_INT_V16HI_UHI:
39024 case V8HI_FTYPE_V8HI_INT_V8HI_UQI:
39025 case V4DI_FTYPE_V4DI_INT_V4DI_UQI:
39026 case V2DI_FTYPE_V2DI_INT_V2DI_UQI:
39027 case V8SI_FTYPE_V8SI_INT_V8SI_UQI:
39028 case V4SI_FTYPE_V4SI_INT_V4SI_UQI:
39029 case V4DF_FTYPE_V4DF_INT_V4DF_UQI:
39030 case V2DF_FTYPE_V2DF_INT_V2DF_UQI:
39031 case V8DF_FTYPE_V8DF_INT_V8DF_UQI:
39032 case V16SF_FTYPE_V16SF_INT_V16SF_UHI:
39033 case V16HI_FTYPE_V16SF_INT_V16HI_UHI:
39034 case V16SI_FTYPE_V16SI_INT_V16SI_UHI:
39035 case V4SI_FTYPE_V16SI_INT_V4SI_UQI:
39036 case V4DI_FTYPE_V8DI_INT_V4DI_UQI:
39037 case V4DF_FTYPE_V8DF_INT_V4DF_UQI:
39038 case V4SF_FTYPE_V16SF_INT_V4SF_UQI:
39039 case V8DI_FTYPE_V8DI_INT_V8DI_UQI:
39042 nargs_constant = 1;
39044 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI:
39045 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI:
39046 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI:
39047 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI:
39048 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI:
39049 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI:
39050 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI:
39051 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI:
39052 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI:
39053 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI:
39054 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI:
39055 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI:
39056 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI:
39057 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI:
39058 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI:
39059 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI:
39060 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI:
39061 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI:
39062 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI:
39063 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI:
39064 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI:
39065 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI:
39066 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI:
39067 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI:
39068 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI:
39069 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI:
39070 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI:
39073 nargs_constant = 1;
39075 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI:
39076 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI:
39077 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI:
39078 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI:
39079 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI:
39080 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI:
39081 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI:
39082 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI:
39083 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI:
39084 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI:
39088 nargs_constant = 1;
39092 gcc_unreachable ();
39095 gcc_assert (nargs <= ARRAY_SIZE (args));
39097 if (comparison != UNKNOWN)
39099 gcc_assert (nargs == 2);
39100 return ix86_expand_sse_compare (d, exp, target, swap);
39103 if (rmode == VOIDmode || rmode == tmode)
39107 || GET_MODE (target) != tmode
39108 || !insn_p->operand[0].predicate (target, tmode))
39109 target = gen_reg_rtx (tmode);
39110 real_target = target;
39114 real_target = gen_reg_rtx (tmode);
39115 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
39118 for (i = 0; i < nargs; i++)
39120 tree arg = CALL_EXPR_ARG (exp, i);
39121 rtx op = expand_normal (arg);
39122 machine_mode mode = insn_p->operand[i + 1].mode;
39123 bool match = insn_p->operand[i + 1].predicate (op, mode);
39125 if (last_arg_count && (i + 1) == nargs)
39127 /* SIMD shift insns take either an 8-bit immediate or
39128 register as count. But builtin functions take int as
39129 count. If count doesn't match, we put it in register. */
39132 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
39133 if (!insn_p->operand[i + 1].predicate (op, mode))
39134 op = copy_to_reg (op);
39137 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
39138 (!mask_pos && (nargs - i) <= nargs_constant))
39143 case CODE_FOR_avx_vinsertf128v4di:
39144 case CODE_FOR_avx_vextractf128v4di:
39145 error ("the last argument must be an 1-bit immediate");
39148 case CODE_FOR_avx512f_cmpv8di3_mask:
39149 case CODE_FOR_avx512f_cmpv16si3_mask:
39150 case CODE_FOR_avx512f_ucmpv8di3_mask:
39151 case CODE_FOR_avx512f_ucmpv16si3_mask:
39152 case CODE_FOR_avx512vl_cmpv4di3_mask:
39153 case CODE_FOR_avx512vl_cmpv8si3_mask:
39154 case CODE_FOR_avx512vl_ucmpv4di3_mask:
39155 case CODE_FOR_avx512vl_ucmpv8si3_mask:
39156 case CODE_FOR_avx512vl_cmpv2di3_mask:
39157 case CODE_FOR_avx512vl_cmpv4si3_mask:
39158 case CODE_FOR_avx512vl_ucmpv2di3_mask:
39159 case CODE_FOR_avx512vl_ucmpv4si3_mask:
39160 error ("the last argument must be a 3-bit immediate");
39163 case CODE_FOR_sse4_1_roundsd:
39164 case CODE_FOR_sse4_1_roundss:
39166 case CODE_FOR_sse4_1_roundpd:
39167 case CODE_FOR_sse4_1_roundps:
39168 case CODE_FOR_avx_roundpd256:
39169 case CODE_FOR_avx_roundps256:
39171 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
39172 case CODE_FOR_sse4_1_roundps_sfix:
39173 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
39174 case CODE_FOR_avx_roundps_sfix256:
39176 case CODE_FOR_sse4_1_blendps:
39177 case CODE_FOR_avx_blendpd256:
39178 case CODE_FOR_avx_vpermilv4df:
39179 case CODE_FOR_avx_vpermilv4df_mask:
39180 case CODE_FOR_avx512f_getmantv8df_mask:
39181 case CODE_FOR_avx512f_getmantv16sf_mask:
39182 case CODE_FOR_avx512vl_getmantv8sf_mask:
39183 case CODE_FOR_avx512vl_getmantv4df_mask:
39184 case CODE_FOR_avx512vl_getmantv4sf_mask:
39185 case CODE_FOR_avx512vl_getmantv2df_mask:
39186 case CODE_FOR_avx512dq_rangepv8df_mask_round:
39187 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
39188 case CODE_FOR_avx512dq_rangepv4df_mask:
39189 case CODE_FOR_avx512dq_rangepv8sf_mask:
39190 case CODE_FOR_avx512dq_rangepv2df_mask:
39191 case CODE_FOR_avx512dq_rangepv4sf_mask:
39192 case CODE_FOR_avx_shufpd256_mask:
39193 error ("the last argument must be a 4-bit immediate");
39196 case CODE_FOR_sha1rnds4:
39197 case CODE_FOR_sse4_1_blendpd:
39198 case CODE_FOR_avx_vpermilv2df:
39199 case CODE_FOR_avx_vpermilv2df_mask:
39200 case CODE_FOR_xop_vpermil2v2df3:
39201 case CODE_FOR_xop_vpermil2v4sf3:
39202 case CODE_FOR_xop_vpermil2v4df3:
39203 case CODE_FOR_xop_vpermil2v8sf3:
39204 case CODE_FOR_avx512f_vinsertf32x4_mask:
39205 case CODE_FOR_avx512f_vinserti32x4_mask:
39206 case CODE_FOR_avx512f_vextractf32x4_mask:
39207 case CODE_FOR_avx512f_vextracti32x4_mask:
39208 case CODE_FOR_sse2_shufpd:
39209 case CODE_FOR_sse2_shufpd_mask:
39210 case CODE_FOR_avx512dq_shuf_f64x2_mask:
39211 case CODE_FOR_avx512dq_shuf_i64x2_mask:
39212 case CODE_FOR_avx512vl_shuf_i32x4_mask:
39213 case CODE_FOR_avx512vl_shuf_f32x4_mask:
39214 error ("the last argument must be a 2-bit immediate");
39217 case CODE_FOR_avx_vextractf128v4df:
39218 case CODE_FOR_avx_vextractf128v8sf:
39219 case CODE_FOR_avx_vextractf128v8si:
39220 case CODE_FOR_avx_vinsertf128v4df:
39221 case CODE_FOR_avx_vinsertf128v8sf:
39222 case CODE_FOR_avx_vinsertf128v8si:
39223 case CODE_FOR_avx512f_vinsertf64x4_mask:
39224 case CODE_FOR_avx512f_vinserti64x4_mask:
39225 case CODE_FOR_avx512f_vextractf64x4_mask:
39226 case CODE_FOR_avx512f_vextracti64x4_mask:
39227 case CODE_FOR_avx512dq_vinsertf32x8_mask:
39228 case CODE_FOR_avx512dq_vinserti32x8_mask:
39229 case CODE_FOR_avx512vl_vinsertv4df:
39230 case CODE_FOR_avx512vl_vinsertv4di:
39231 case CODE_FOR_avx512vl_vinsertv8sf:
39232 case CODE_FOR_avx512vl_vinsertv8si:
39233 error ("the last argument must be a 1-bit immediate");
39236 case CODE_FOR_avx_vmcmpv2df3:
39237 case CODE_FOR_avx_vmcmpv4sf3:
39238 case CODE_FOR_avx_cmpv2df3:
39239 case CODE_FOR_avx_cmpv4sf3:
39240 case CODE_FOR_avx_cmpv4df3:
39241 case CODE_FOR_avx_cmpv8sf3:
39242 case CODE_FOR_avx512f_cmpv8df3_mask:
39243 case CODE_FOR_avx512f_cmpv16sf3_mask:
39244 case CODE_FOR_avx512f_vmcmpv2df3_mask:
39245 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
39246 error ("the last argument must be a 5-bit immediate");
39250 switch (nargs_constant)
39253 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
39254 (!mask_pos && (nargs - i) == nargs_constant))
39256 error ("the next to last argument must be an 8-bit immediate");
39260 error ("the last argument must be an 8-bit immediate");
39263 gcc_unreachable ();
39270 if (VECTOR_MODE_P (mode))
39271 op = safe_vector_operand (op, mode);
39273 /* If we aren't optimizing, only allow one memory operand to
39275 if (memory_operand (op, mode))
39278 op = fixup_modeless_constant (op, mode);
39280 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
39282 if (optimize || !match || num_memory > 1)
39283 op = copy_to_mode_reg (mode, op);
39287 op = copy_to_reg (op);
39288 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
39293 args[i].mode = mode;
39299 pat = GEN_FCN (icode) (real_target, args[0].op);
39302 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
39305 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
39309 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
39310 args[2].op, args[3].op);
39313 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
39314 args[2].op, args[3].op, args[4].op);
39316 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
39317 args[2].op, args[3].op, args[4].op,
39321 gcc_unreachable ();
39331 /* Transform pattern of following layout:
39334 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
39342 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
39346 (parallel [ A B ... ]) */
39349 ix86_erase_embedded_rounding (rtx pat)
39351 if (GET_CODE (pat) == INSN)
39352 pat = PATTERN (pat);
39354 gcc_assert (GET_CODE (pat) == PARALLEL);
39356 if (XVECLEN (pat, 0) == 2)
39358 rtx p0 = XVECEXP (pat, 0, 0);
39359 rtx p1 = XVECEXP (pat, 0, 1);
39361 gcc_assert (GET_CODE (p0) == SET
39362 && GET_CODE (p1) == UNSPEC
39363 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
39369 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
39373 for (; i < XVECLEN (pat, 0); ++i)
39375 rtx elem = XVECEXP (pat, 0, i);
39376 if (GET_CODE (elem) != UNSPEC
39377 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
39381 /* No more than 1 occurence was removed. */
39382 gcc_assert (j >= XVECLEN (pat, 0) - 1);
39384 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
39388 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
39391 ix86_expand_sse_comi_round (const struct builtin_description *d,
39392 tree exp, rtx target)
39395 tree arg0 = CALL_EXPR_ARG (exp, 0);
39396 tree arg1 = CALL_EXPR_ARG (exp, 1);
39397 tree arg2 = CALL_EXPR_ARG (exp, 2);
39398 tree arg3 = CALL_EXPR_ARG (exp, 3);
39399 rtx op0 = expand_normal (arg0);
39400 rtx op1 = expand_normal (arg1);
39401 rtx op2 = expand_normal (arg2);
39402 rtx op3 = expand_normal (arg3);
39403 enum insn_code icode = d->icode;
39404 const struct insn_data_d *insn_p = &insn_data[icode];
39405 machine_mode mode0 = insn_p->operand[0].mode;
39406 machine_mode mode1 = insn_p->operand[1].mode;
39407 enum rtx_code comparison = UNEQ;
39408 bool need_ucomi = false;
39410 /* See avxintrin.h for values. */
39411 enum rtx_code comi_comparisons[32] =
39413 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
39414 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
39415 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
39417 bool need_ucomi_values[32] =
39419 true, false, false, true, true, false, false, true,
39420 true, false, false, true, true, false, false, true,
39421 false, true, true, false, false, true, true, false,
39422 false, true, true, false, false, true, true, false
39425 if (!CONST_INT_P (op2))
39427 error ("the third argument must be comparison constant");
39430 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
39432 error ("incorrect comparison mode");
39436 if (!insn_p->operand[2].predicate (op3, SImode))
39438 error ("incorrect rounding operand");
39442 comparison = comi_comparisons[INTVAL (op2)];
39443 need_ucomi = need_ucomi_values[INTVAL (op2)];
39445 if (VECTOR_MODE_P (mode0))
39446 op0 = safe_vector_operand (op0, mode0);
39447 if (VECTOR_MODE_P (mode1))
39448 op1 = safe_vector_operand (op1, mode1);
39450 target = gen_reg_rtx (SImode);
39451 emit_move_insn (target, const0_rtx);
39452 target = gen_rtx_SUBREG (QImode, target, 0);
39454 if ((optimize && !register_operand (op0, mode0))
39455 || !insn_p->operand[0].predicate (op0, mode0))
39456 op0 = copy_to_mode_reg (mode0, op0);
39457 if ((optimize && !register_operand (op1, mode1))
39458 || !insn_p->operand[1].predicate (op1, mode1))
39459 op1 = copy_to_mode_reg (mode1, op1);
39462 icode = icode == CODE_FOR_sse_comi_round
39463 ? CODE_FOR_sse_ucomi_round
39464 : CODE_FOR_sse2_ucomi_round;
39466 pat = GEN_FCN (icode) (op0, op1, op3);
39470 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
39471 if (INTVAL (op3) == NO_ROUND)
39473 pat = ix86_erase_embedded_rounding (pat);
39477 set_dst = SET_DEST (pat);
39481 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
39482 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
39486 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
39487 gen_rtx_fmt_ee (comparison, QImode,
39491 return SUBREG_REG (target);
39495 ix86_expand_round_builtin (const struct builtin_description *d,
39496 tree exp, rtx target)
39499 unsigned int i, nargs;
39505 enum insn_code icode = d->icode;
39506 const struct insn_data_d *insn_p = &insn_data[icode];
39507 machine_mode tmode = insn_p->operand[0].mode;
39508 unsigned int nargs_constant = 0;
39509 unsigned int redundant_embed_rnd = 0;
39511 switch ((enum ix86_builtin_func_type) d->flag)
39513 case UINT64_FTYPE_V2DF_INT:
39514 case UINT64_FTYPE_V4SF_INT:
39515 case UINT_FTYPE_V2DF_INT:
39516 case UINT_FTYPE_V4SF_INT:
39517 case INT64_FTYPE_V2DF_INT:
39518 case INT64_FTYPE_V4SF_INT:
39519 case INT_FTYPE_V2DF_INT:
39520 case INT_FTYPE_V4SF_INT:
39523 case V4SF_FTYPE_V4SF_UINT_INT:
39524 case V4SF_FTYPE_V4SF_UINT64_INT:
39525 case V2DF_FTYPE_V2DF_UINT64_INT:
39526 case V4SF_FTYPE_V4SF_INT_INT:
39527 case V4SF_FTYPE_V4SF_INT64_INT:
39528 case V2DF_FTYPE_V2DF_INT64_INT:
39529 case V4SF_FTYPE_V4SF_V4SF_INT:
39530 case V2DF_FTYPE_V2DF_V2DF_INT:
39531 case V4SF_FTYPE_V4SF_V2DF_INT:
39532 case V2DF_FTYPE_V2DF_V4SF_INT:
39535 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
39536 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
39537 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
39538 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
39539 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
39540 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
39541 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
39542 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
39543 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
39544 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
39545 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
39546 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
39547 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
39548 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
39551 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
39552 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
39553 nargs_constant = 2;
39556 case INT_FTYPE_V4SF_V4SF_INT_INT:
39557 case INT_FTYPE_V2DF_V2DF_INT_INT:
39558 return ix86_expand_sse_comi_round (d, exp, target);
39559 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT:
39560 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
39561 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
39562 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
39563 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
39564 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
39567 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
39568 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
39569 nargs_constant = 4;
39572 case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT:
39573 case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT:
39574 case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT:
39575 case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT:
39576 nargs_constant = 3;
39579 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
39580 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
39581 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
39582 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
39584 nargs_constant = 4;
39586 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
39587 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
39588 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
39589 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
39591 nargs_constant = 3;
39594 gcc_unreachable ();
39596 gcc_assert (nargs <= ARRAY_SIZE (args));
39600 || GET_MODE (target) != tmode
39601 || !insn_p->operand[0].predicate (target, tmode))
39602 target = gen_reg_rtx (tmode);
39604 for (i = 0; i < nargs; i++)
39606 tree arg = CALL_EXPR_ARG (exp, i);
39607 rtx op = expand_normal (arg);
39608 machine_mode mode = insn_p->operand[i + 1].mode;
39609 bool match = insn_p->operand[i + 1].predicate (op, mode);
39611 if (i == nargs - nargs_constant)
39617 case CODE_FOR_avx512f_getmantv8df_mask_round:
39618 case CODE_FOR_avx512f_getmantv16sf_mask_round:
39619 case CODE_FOR_avx512f_vgetmantv2df_round:
39620 case CODE_FOR_avx512f_vgetmantv4sf_round:
39621 error ("the immediate argument must be a 4-bit immediate");
39623 case CODE_FOR_avx512f_cmpv8df3_mask_round:
39624 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
39625 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
39626 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
39627 error ("the immediate argument must be a 5-bit immediate");
39630 error ("the immediate argument must be an 8-bit immediate");
39635 else if (i == nargs-1)
39637 if (!insn_p->operand[nargs].predicate (op, SImode))
39639 error ("incorrect rounding operand");
39643 /* If there is no rounding use normal version of the pattern. */
39644 if (INTVAL (op) == NO_ROUND)
39645 redundant_embed_rnd = 1;
39649 if (VECTOR_MODE_P (mode))
39650 op = safe_vector_operand (op, mode);
39652 op = fixup_modeless_constant (op, mode);
39654 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
39656 if (optimize || !match)
39657 op = copy_to_mode_reg (mode, op);
39661 op = copy_to_reg (op);
39662 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
39667 args[i].mode = mode;
39673 pat = GEN_FCN (icode) (target, args[0].op);
39676 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
39679 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39683 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39684 args[2].op, args[3].op);
39687 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39688 args[2].op, args[3].op, args[4].op);
39690 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39691 args[2].op, args[3].op, args[4].op,
39695 gcc_unreachable ();
39701 if (redundant_embed_rnd)
39702 pat = ix86_erase_embedded_rounding (pat);
39708 /* Subroutine of ix86_expand_builtin to take care of special insns
39709 with variable number of operands. */
39712 ix86_expand_special_args_builtin (const struct builtin_description *d,
39713 tree exp, rtx target)
39717 unsigned int i, nargs, arg_adjust, memory;
39718 bool aligned_mem = false;
39724 enum insn_code icode = d->icode;
39725 bool last_arg_constant = false;
39726 const struct insn_data_d *insn_p = &insn_data[icode];
39727 machine_mode tmode = insn_p->operand[0].mode;
39728 enum { load, store } klass;
39730 switch ((enum ix86_builtin_func_type) d->flag)
39732 case VOID_FTYPE_VOID:
39733 emit_insn (GEN_FCN (icode) (target));
39735 case VOID_FTYPE_UINT64:
39736 case VOID_FTYPE_UNSIGNED:
39742 case INT_FTYPE_VOID:
39743 case USHORT_FTYPE_VOID:
39744 case UINT64_FTYPE_VOID:
39745 case UNSIGNED_FTYPE_VOID:
39750 case UINT64_FTYPE_PUNSIGNED:
39751 case V2DI_FTYPE_PV2DI:
39752 case V4DI_FTYPE_PV4DI:
39753 case V32QI_FTYPE_PCCHAR:
39754 case V16QI_FTYPE_PCCHAR:
39755 case V8SF_FTYPE_PCV4SF:
39756 case V8SF_FTYPE_PCFLOAT:
39757 case V4SF_FTYPE_PCFLOAT:
39758 case V4DF_FTYPE_PCV2DF:
39759 case V4DF_FTYPE_PCDOUBLE:
39760 case V2DF_FTYPE_PCDOUBLE:
39761 case VOID_FTYPE_PVOID:
39762 case V8DI_FTYPE_PV8DI:
39768 case CODE_FOR_sse4_1_movntdqa:
39769 case CODE_FOR_avx2_movntdqa:
39770 case CODE_FOR_avx512f_movntdqa:
39771 aligned_mem = true;
39777 case VOID_FTYPE_PV2SF_V4SF:
39778 case VOID_FTYPE_PV8DI_V8DI:
39779 case VOID_FTYPE_PV4DI_V4DI:
39780 case VOID_FTYPE_PV2DI_V2DI:
39781 case VOID_FTYPE_PCHAR_V32QI:
39782 case VOID_FTYPE_PCHAR_V16QI:
39783 case VOID_FTYPE_PFLOAT_V16SF:
39784 case VOID_FTYPE_PFLOAT_V8SF:
39785 case VOID_FTYPE_PFLOAT_V4SF:
39786 case VOID_FTYPE_PDOUBLE_V8DF:
39787 case VOID_FTYPE_PDOUBLE_V4DF:
39788 case VOID_FTYPE_PDOUBLE_V2DF:
39789 case VOID_FTYPE_PLONGLONG_LONGLONG:
39790 case VOID_FTYPE_PULONGLONG_ULONGLONG:
39791 case VOID_FTYPE_PINT_INT:
39794 /* Reserve memory operand for target. */
39795 memory = ARRAY_SIZE (args);
39798 /* These builtins and instructions require the memory
39799 to be properly aligned. */
39800 case CODE_FOR_avx_movntv4di:
39801 case CODE_FOR_sse2_movntv2di:
39802 case CODE_FOR_avx_movntv8sf:
39803 case CODE_FOR_sse_movntv4sf:
39804 case CODE_FOR_sse4a_vmmovntv4sf:
39805 case CODE_FOR_avx_movntv4df:
39806 case CODE_FOR_sse2_movntv2df:
39807 case CODE_FOR_sse4a_vmmovntv2df:
39808 case CODE_FOR_sse2_movntidi:
39809 case CODE_FOR_sse_movntq:
39810 case CODE_FOR_sse2_movntisi:
39811 case CODE_FOR_avx512f_movntv16sf:
39812 case CODE_FOR_avx512f_movntv8df:
39813 case CODE_FOR_avx512f_movntv8di:
39814 aligned_mem = true;
39820 case V4SF_FTYPE_V4SF_PCV2SF:
39821 case V2DF_FTYPE_V2DF_PCDOUBLE:
39826 case V8SF_FTYPE_PCV8SF_V8SI:
39827 case V4DF_FTYPE_PCV4DF_V4DI:
39828 case V4SF_FTYPE_PCV4SF_V4SI:
39829 case V2DF_FTYPE_PCV2DF_V2DI:
39830 case V8SI_FTYPE_PCV8SI_V8SI:
39831 case V4DI_FTYPE_PCV4DI_V4DI:
39832 case V4SI_FTYPE_PCV4SI_V4SI:
39833 case V2DI_FTYPE_PCV2DI_V2DI:
39838 case VOID_FTYPE_PV8DF_V8DF_UQI:
39839 case VOID_FTYPE_PV4DF_V4DF_UQI:
39840 case VOID_FTYPE_PV2DF_V2DF_UQI:
39841 case VOID_FTYPE_PV16SF_V16SF_UHI:
39842 case VOID_FTYPE_PV8SF_V8SF_UQI:
39843 case VOID_FTYPE_PV4SF_V4SF_UQI:
39844 case VOID_FTYPE_PV8DI_V8DI_UQI:
39845 case VOID_FTYPE_PV4DI_V4DI_UQI:
39846 case VOID_FTYPE_PV2DI_V2DI_UQI:
39847 case VOID_FTYPE_PV16SI_V16SI_UHI:
39848 case VOID_FTYPE_PV8SI_V8SI_UQI:
39849 case VOID_FTYPE_PV4SI_V4SI_UQI:
39852 /* These builtins and instructions require the memory
39853 to be properly aligned. */
39854 case CODE_FOR_avx512f_storev16sf_mask:
39855 case CODE_FOR_avx512f_storev16si_mask:
39856 case CODE_FOR_avx512f_storev8df_mask:
39857 case CODE_FOR_avx512f_storev8di_mask:
39858 case CODE_FOR_avx512vl_storev8sf_mask:
39859 case CODE_FOR_avx512vl_storev8si_mask:
39860 case CODE_FOR_avx512vl_storev4df_mask:
39861 case CODE_FOR_avx512vl_storev4di_mask:
39862 case CODE_FOR_avx512vl_storev4sf_mask:
39863 case CODE_FOR_avx512vl_storev4si_mask:
39864 case CODE_FOR_avx512vl_storev2df_mask:
39865 case CODE_FOR_avx512vl_storev2di_mask:
39866 aligned_mem = true;
39872 case VOID_FTYPE_PV8SF_V8SI_V8SF:
39873 case VOID_FTYPE_PV4DF_V4DI_V4DF:
39874 case VOID_FTYPE_PV4SF_V4SI_V4SF:
39875 case VOID_FTYPE_PV2DF_V2DI_V2DF:
39876 case VOID_FTYPE_PV8SI_V8SI_V8SI:
39877 case VOID_FTYPE_PV4DI_V4DI_V4DI:
39878 case VOID_FTYPE_PV4SI_V4SI_V4SI:
39879 case VOID_FTYPE_PV2DI_V2DI_V2DI:
39880 case VOID_FTYPE_PV8SI_V8DI_UQI:
39881 case VOID_FTYPE_PV8HI_V8DI_UQI:
39882 case VOID_FTYPE_PV16HI_V16SI_UHI:
39883 case VOID_FTYPE_PV16QI_V8DI_UQI:
39884 case VOID_FTYPE_PV16QI_V16SI_UHI:
39885 case VOID_FTYPE_PV4SI_V4DI_UQI:
39886 case VOID_FTYPE_PV4SI_V2DI_UQI:
39887 case VOID_FTYPE_PV8HI_V4DI_UQI:
39888 case VOID_FTYPE_PV8HI_V2DI_UQI:
39889 case VOID_FTYPE_PV8HI_V8SI_UQI:
39890 case VOID_FTYPE_PV8HI_V4SI_UQI:
39891 case VOID_FTYPE_PV16QI_V4DI_UQI:
39892 case VOID_FTYPE_PV16QI_V2DI_UQI:
39893 case VOID_FTYPE_PV16QI_V8SI_UQI:
39894 case VOID_FTYPE_PV16QI_V4SI_UQI:
39895 case VOID_FTYPE_PV8HI_V8HI_UQI:
39896 case VOID_FTYPE_PV16HI_V16HI_UHI:
39897 case VOID_FTYPE_PV32HI_V32HI_USI:
39898 case VOID_FTYPE_PV16QI_V16QI_UHI:
39899 case VOID_FTYPE_PV32QI_V32QI_USI:
39900 case VOID_FTYPE_PV64QI_V64QI_UDI:
39903 /* Reserve memory operand for target. */
39904 memory = ARRAY_SIZE (args);
39906 case V4SF_FTYPE_PCV4SF_V4SF_UQI:
39907 case V8SF_FTYPE_PCV8SF_V8SF_UQI:
39908 case V16SF_FTYPE_PCV16SF_V16SF_UHI:
39909 case V4SI_FTYPE_PCV4SI_V4SI_UQI:
39910 case V8SI_FTYPE_PCV8SI_V8SI_UQI:
39911 case V16SI_FTYPE_PCV16SI_V16SI_UHI:
39912 case V2DF_FTYPE_PCV2DF_V2DF_UQI:
39913 case V4DF_FTYPE_PCV4DF_V4DF_UQI:
39914 case V8DF_FTYPE_PCV8DF_V8DF_UQI:
39915 case V2DI_FTYPE_PCV2DI_V2DI_UQI:
39916 case V4DI_FTYPE_PCV4DI_V4DI_UQI:
39917 case V8DI_FTYPE_PCV8DI_V8DI_UQI:
39918 case V8HI_FTYPE_PCV8HI_V8HI_UQI:
39919 case V16HI_FTYPE_PCV16HI_V16HI_UHI:
39920 case V32HI_FTYPE_PCV32HI_V32HI_USI:
39921 case V16QI_FTYPE_PCV16QI_V16QI_UHI:
39922 case V32QI_FTYPE_PCV32QI_V32QI_USI:
39923 case V64QI_FTYPE_PCV64QI_V64QI_UDI:
39929 /* These builtins and instructions require the memory
39930 to be properly aligned. */
39931 case CODE_FOR_avx512f_loadv16sf_mask:
39932 case CODE_FOR_avx512f_loadv16si_mask:
39933 case CODE_FOR_avx512f_loadv8df_mask:
39934 case CODE_FOR_avx512f_loadv8di_mask:
39935 case CODE_FOR_avx512vl_loadv8sf_mask:
39936 case CODE_FOR_avx512vl_loadv8si_mask:
39937 case CODE_FOR_avx512vl_loadv4df_mask:
39938 case CODE_FOR_avx512vl_loadv4di_mask:
39939 case CODE_FOR_avx512vl_loadv4sf_mask:
39940 case CODE_FOR_avx512vl_loadv4si_mask:
39941 case CODE_FOR_avx512vl_loadv2df_mask:
39942 case CODE_FOR_avx512vl_loadv2di_mask:
39943 case CODE_FOR_avx512bw_loadv64qi_mask:
39944 case CODE_FOR_avx512vl_loadv32qi_mask:
39945 case CODE_FOR_avx512vl_loadv16qi_mask:
39946 case CODE_FOR_avx512bw_loadv32hi_mask:
39947 case CODE_FOR_avx512vl_loadv16hi_mask:
39948 case CODE_FOR_avx512vl_loadv8hi_mask:
39949 aligned_mem = true;
39955 case VOID_FTYPE_UINT_UINT_UINT:
39956 case VOID_FTYPE_UINT64_UINT_UINT:
39957 case UCHAR_FTYPE_UINT_UINT_UINT:
39958 case UCHAR_FTYPE_UINT64_UINT_UINT:
39961 memory = ARRAY_SIZE (args);
39962 last_arg_constant = true;
39965 gcc_unreachable ();
39968 gcc_assert (nargs <= ARRAY_SIZE (args));
39970 if (klass == store)
39972 arg = CALL_EXPR_ARG (exp, 0);
39973 op = expand_normal (arg);
39974 gcc_assert (target == 0);
39977 op = ix86_zero_extend_to_Pmode (op);
39978 target = gen_rtx_MEM (tmode, op);
39979 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
39980 on it. Try to improve it using get_pointer_alignment,
39981 and if the special builtin is one that requires strict
39982 mode alignment, also from it's GET_MODE_ALIGNMENT.
39983 Failure to do so could lead to ix86_legitimate_combined_insn
39984 rejecting all changes to such insns. */
39985 unsigned int align = get_pointer_alignment (arg);
39986 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
39987 align = GET_MODE_ALIGNMENT (tmode);
39988 if (MEM_ALIGN (target) < align)
39989 set_mem_align (target, align);
39992 target = force_reg (tmode, op);
40000 || !register_operand (target, tmode)
40001 || GET_MODE (target) != tmode)
40002 target = gen_reg_rtx (tmode);
40005 for (i = 0; i < nargs; i++)
40007 machine_mode mode = insn_p->operand[i + 1].mode;
40010 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
40011 op = expand_normal (arg);
40012 match = insn_p->operand[i + 1].predicate (op, mode);
40014 if (last_arg_constant && (i + 1) == nargs)
40018 if (icode == CODE_FOR_lwp_lwpvalsi3
40019 || icode == CODE_FOR_lwp_lwpinssi3
40020 || icode == CODE_FOR_lwp_lwpvaldi3
40021 || icode == CODE_FOR_lwp_lwpinsdi3)
40022 error ("the last argument must be a 32-bit immediate");
40024 error ("the last argument must be an 8-bit immediate");
40032 /* This must be the memory operand. */
40033 op = ix86_zero_extend_to_Pmode (op);
40034 op = gen_rtx_MEM (mode, op);
40035 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
40036 on it. Try to improve it using get_pointer_alignment,
40037 and if the special builtin is one that requires strict
40038 mode alignment, also from it's GET_MODE_ALIGNMENT.
40039 Failure to do so could lead to ix86_legitimate_combined_insn
40040 rejecting all changes to such insns. */
40041 unsigned int align = get_pointer_alignment (arg);
40042 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
40043 align = GET_MODE_ALIGNMENT (mode);
40044 if (MEM_ALIGN (op) < align)
40045 set_mem_align (op, align);
40049 /* This must be register. */
40050 if (VECTOR_MODE_P (mode))
40051 op = safe_vector_operand (op, mode);
40053 op = fixup_modeless_constant (op, mode);
40055 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
40056 op = copy_to_mode_reg (mode, op);
40059 op = copy_to_reg (op);
40060 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
40066 args[i].mode = mode;
40072 pat = GEN_FCN (icode) (target);
40075 pat = GEN_FCN (icode) (target, args[0].op);
40078 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
40081 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
40084 gcc_unreachable ();
40090 return klass == store ? 0 : target;
40093 /* Return the integer constant in ARG. Constrain it to be in the range
40094 of the subparts of VEC_TYPE; issue an error if not. */
40097 get_element_number (tree vec_type, tree arg)
40099 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
40101 if (!tree_fits_uhwi_p (arg)
40102 || (elt = tree_to_uhwi (arg), elt > max))
40104 error ("selector must be an integer constant in the range 0..%wi", max);
40111 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
40112 ix86_expand_vector_init. We DO have language-level syntax for this, in
40113 the form of (type){ init-list }. Except that since we can't place emms
40114 instructions from inside the compiler, we can't allow the use of MMX
40115 registers unless the user explicitly asks for it. So we do *not* define
40116 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
40117 we have builtins invoked by mmintrin.h that gives us license to emit
40118 these sorts of instructions. */
40121 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
40123 machine_mode tmode = TYPE_MODE (type);
40124 machine_mode inner_mode = GET_MODE_INNER (tmode);
40125 int i, n_elt = GET_MODE_NUNITS (tmode);
40126 rtvec v = rtvec_alloc (n_elt);
40128 gcc_assert (VECTOR_MODE_P (tmode));
40129 gcc_assert (call_expr_nargs (exp) == n_elt);
40131 for (i = 0; i < n_elt; ++i)
40133 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
40134 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
40137 if (!target || !register_operand (target, tmode))
40138 target = gen_reg_rtx (tmode);
40140 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
40144 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
40145 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
40146 had a language-level syntax for referencing vector elements. */
40149 ix86_expand_vec_ext_builtin (tree exp, rtx target)
40151 machine_mode tmode, mode0;
40156 arg0 = CALL_EXPR_ARG (exp, 0);
40157 arg1 = CALL_EXPR_ARG (exp, 1);
40159 op0 = expand_normal (arg0);
40160 elt = get_element_number (TREE_TYPE (arg0), arg1);
40162 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
40163 mode0 = TYPE_MODE (TREE_TYPE (arg0));
40164 gcc_assert (VECTOR_MODE_P (mode0));
40166 op0 = force_reg (mode0, op0);
40168 if (optimize || !target || !register_operand (target, tmode))
40169 target = gen_reg_rtx (tmode);
40171 ix86_expand_vector_extract (true, target, op0, elt);
40176 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
40177 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
40178 a language-level syntax for referencing vector elements. */
40181 ix86_expand_vec_set_builtin (tree exp)
40183 machine_mode tmode, mode1;
40184 tree arg0, arg1, arg2;
40186 rtx op0, op1, target;
40188 arg0 = CALL_EXPR_ARG (exp, 0);
40189 arg1 = CALL_EXPR_ARG (exp, 1);
40190 arg2 = CALL_EXPR_ARG (exp, 2);
40192 tmode = TYPE_MODE (TREE_TYPE (arg0));
40193 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
40194 gcc_assert (VECTOR_MODE_P (tmode));
40196 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
40197 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
40198 elt = get_element_number (TREE_TYPE (arg0), arg2);
40200 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
40201 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
40203 op0 = force_reg (tmode, op0);
40204 op1 = force_reg (mode1, op1);
40206 /* OP0 is the source of these builtin functions and shouldn't be
40207 modified. Create a copy, use it and return it as target. */
40208 target = gen_reg_rtx (tmode);
40209 emit_move_insn (target, op0);
40210 ix86_expand_vector_set (true, target, op1, elt);
40215 /* Emit conditional move of SRC to DST with condition
40218 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
40224 t = ix86_expand_compare (code, op1, op2);
40225 emit_insn (gen_rtx_SET (dst, gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
40230 rtx_code_label *nomove = gen_label_rtx ();
40231 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
40232 const0_rtx, GET_MODE (op1), 1, nomove);
40233 emit_move_insn (dst, src);
40234 emit_label (nomove);
40238 /* Choose max of DST and SRC and put it to DST. */
40240 ix86_emit_move_max (rtx dst, rtx src)
40242 ix86_emit_cmove (dst, src, LTU, dst, src);
40245 /* Expand an expression EXP that calls a built-in function,
40246 with result going to TARGET if that's convenient
40247 (and in mode MODE if that's convenient).
40248 SUBTARGET may be used as the target for computing one of EXP's operands.
40249 IGNORE is nonzero if the value is to be ignored. */
40252 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
40253 machine_mode mode, int ignore)
40255 const struct builtin_description *d;
40257 enum insn_code icode;
40258 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
40259 tree arg0, arg1, arg2, arg3, arg4;
40260 rtx op0, op1, op2, op3, op4, pat, insn;
40261 machine_mode mode0, mode1, mode2, mode3, mode4;
40262 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
40264 /* For CPU builtins that can be folded, fold first and expand the fold. */
40267 case IX86_BUILTIN_CPU_INIT:
40269 /* Make it call __cpu_indicator_init in libgcc. */
40270 tree call_expr, fndecl, type;
40271 type = build_function_type_list (integer_type_node, NULL_TREE);
40272 fndecl = build_fn_decl ("__cpu_indicator_init", type);
40273 call_expr = build_call_expr (fndecl, 0);
40274 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
40276 case IX86_BUILTIN_CPU_IS:
40277 case IX86_BUILTIN_CPU_SUPPORTS:
40279 tree arg0 = CALL_EXPR_ARG (exp, 0);
40280 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
40281 gcc_assert (fold_expr != NULL_TREE);
40282 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
40286 /* Determine whether the builtin function is available under the current ISA.
40287 Originally the builtin was not created if it wasn't applicable to the
40288 current ISA based on the command line switches. With function specific
40289 options, we need to check in the context of the function making the call
40290 whether it is supported. */
40291 if (ix86_builtins_isa[fcode].isa
40292 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
40294 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
40295 NULL, (enum fpmath_unit) 0, false);
40298 error ("%qE needs unknown isa option", fndecl);
40301 gcc_assert (opts != NULL);
40302 error ("%qE needs isa option %s", fndecl, opts);
40310 case IX86_BUILTIN_BNDMK:
40312 || GET_MODE (target) != BNDmode
40313 || !register_operand (target, BNDmode))
40314 target = gen_reg_rtx (BNDmode);
40316 arg0 = CALL_EXPR_ARG (exp, 0);
40317 arg1 = CALL_EXPR_ARG (exp, 1);
40319 op0 = expand_normal (arg0);
40320 op1 = expand_normal (arg1);
40322 if (!register_operand (op0, Pmode))
40323 op0 = ix86_zero_extend_to_Pmode (op0);
40324 if (!register_operand (op1, Pmode))
40325 op1 = ix86_zero_extend_to_Pmode (op1);
40327 /* Builtin arg1 is size of block but instruction op1 should
40329 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
40330 NULL_RTX, 1, OPTAB_DIRECT);
40332 emit_insn (BNDmode == BND64mode
40333 ? gen_bnd64_mk (target, op0, op1)
40334 : gen_bnd32_mk (target, op0, op1));
40337 case IX86_BUILTIN_BNDSTX:
40338 arg0 = CALL_EXPR_ARG (exp, 0);
40339 arg1 = CALL_EXPR_ARG (exp, 1);
40340 arg2 = CALL_EXPR_ARG (exp, 2);
40342 op0 = expand_normal (arg0);
40343 op1 = expand_normal (arg1);
40344 op2 = expand_normal (arg2);
40346 if (!register_operand (op0, Pmode))
40347 op0 = ix86_zero_extend_to_Pmode (op0);
40348 if (!register_operand (op1, BNDmode))
40349 op1 = copy_to_mode_reg (BNDmode, op1);
40350 if (!register_operand (op2, Pmode))
40351 op2 = ix86_zero_extend_to_Pmode (op2);
40353 emit_insn (BNDmode == BND64mode
40354 ? gen_bnd64_stx (op2, op0, op1)
40355 : gen_bnd32_stx (op2, op0, op1));
40358 case IX86_BUILTIN_BNDLDX:
40360 || GET_MODE (target) != BNDmode
40361 || !register_operand (target, BNDmode))
40362 target = gen_reg_rtx (BNDmode);
40364 arg0 = CALL_EXPR_ARG (exp, 0);
40365 arg1 = CALL_EXPR_ARG (exp, 1);
40367 op0 = expand_normal (arg0);
40368 op1 = expand_normal (arg1);
40370 if (!register_operand (op0, Pmode))
40371 op0 = ix86_zero_extend_to_Pmode (op0);
40372 if (!register_operand (op1, Pmode))
40373 op1 = ix86_zero_extend_to_Pmode (op1);
40375 emit_insn (BNDmode == BND64mode
40376 ? gen_bnd64_ldx (target, op0, op1)
40377 : gen_bnd32_ldx (target, op0, op1));
40380 case IX86_BUILTIN_BNDCL:
40381 arg0 = CALL_EXPR_ARG (exp, 0);
40382 arg1 = CALL_EXPR_ARG (exp, 1);
40384 op0 = expand_normal (arg0);
40385 op1 = expand_normal (arg1);
40387 if (!register_operand (op0, Pmode))
40388 op0 = ix86_zero_extend_to_Pmode (op0);
40389 if (!register_operand (op1, BNDmode))
40390 op1 = copy_to_mode_reg (BNDmode, op1);
40392 emit_insn (BNDmode == BND64mode
40393 ? gen_bnd64_cl (op1, op0)
40394 : gen_bnd32_cl (op1, op0));
40397 case IX86_BUILTIN_BNDCU:
40398 arg0 = CALL_EXPR_ARG (exp, 0);
40399 arg1 = CALL_EXPR_ARG (exp, 1);
40401 op0 = expand_normal (arg0);
40402 op1 = expand_normal (arg1);
40404 if (!register_operand (op0, Pmode))
40405 op0 = ix86_zero_extend_to_Pmode (op0);
40406 if (!register_operand (op1, BNDmode))
40407 op1 = copy_to_mode_reg (BNDmode, op1);
40409 emit_insn (BNDmode == BND64mode
40410 ? gen_bnd64_cu (op1, op0)
40411 : gen_bnd32_cu (op1, op0));
40414 case IX86_BUILTIN_BNDRET:
40415 arg0 = CALL_EXPR_ARG (exp, 0);
40416 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
40417 target = chkp_get_rtl_bounds (arg0);
40419 /* If no bounds were specified for returned value,
40420 then use INIT bounds. It usually happens when
40421 some built-in function is expanded. */
40424 rtx t1 = gen_reg_rtx (Pmode);
40425 rtx t2 = gen_reg_rtx (Pmode);
40426 target = gen_reg_rtx (BNDmode);
40427 emit_move_insn (t1, const0_rtx);
40428 emit_move_insn (t2, constm1_rtx);
40429 emit_insn (BNDmode == BND64mode
40430 ? gen_bnd64_mk (target, t1, t2)
40431 : gen_bnd32_mk (target, t1, t2));
40434 gcc_assert (target && REG_P (target));
40437 case IX86_BUILTIN_BNDNARROW:
40439 rtx m1, m1h1, m1h2, lb, ub, t1;
40441 /* Return value and lb. */
40442 arg0 = CALL_EXPR_ARG (exp, 0);
40444 arg1 = CALL_EXPR_ARG (exp, 1);
40446 arg2 = CALL_EXPR_ARG (exp, 2);
40448 lb = expand_normal (arg0);
40449 op1 = expand_normal (arg1);
40450 op2 = expand_normal (arg2);
40452 /* Size was passed but we need to use (size - 1) as for bndmk. */
40453 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
40454 NULL_RTX, 1, OPTAB_DIRECT);
40456 /* Add LB to size and inverse to get UB. */
40457 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
40458 op2, 1, OPTAB_DIRECT);
40459 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
40461 if (!register_operand (lb, Pmode))
40462 lb = ix86_zero_extend_to_Pmode (lb);
40463 if (!register_operand (ub, Pmode))
40464 ub = ix86_zero_extend_to_Pmode (ub);
40466 /* We need to move bounds to memory before any computations. */
40471 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
40472 emit_move_insn (m1, op1);
40475 /* Generate mem expression to be used for access to LB and UB. */
40476 m1h1 = adjust_address (m1, Pmode, 0);
40477 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
40479 t1 = gen_reg_rtx (Pmode);
40482 emit_move_insn (t1, m1h1);
40483 ix86_emit_move_max (t1, lb);
40484 emit_move_insn (m1h1, t1);
40486 /* Compute UB. UB is stored in 1's complement form. Therefore
40487 we also use max here. */
40488 emit_move_insn (t1, m1h2);
40489 ix86_emit_move_max (t1, ub);
40490 emit_move_insn (m1h2, t1);
40492 op2 = gen_reg_rtx (BNDmode);
40493 emit_move_insn (op2, m1);
40495 return chkp_join_splitted_slot (lb, op2);
40498 case IX86_BUILTIN_BNDINT:
40500 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
40503 || GET_MODE (target) != BNDmode
40504 || !register_operand (target, BNDmode))
40505 target = gen_reg_rtx (BNDmode);
40507 arg0 = CALL_EXPR_ARG (exp, 0);
40508 arg1 = CALL_EXPR_ARG (exp, 1);
40510 op0 = expand_normal (arg0);
40511 op1 = expand_normal (arg1);
40513 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
40514 rh1 = adjust_address (res, Pmode, 0);
40515 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
40517 /* Put first bounds to temporaries. */
40518 lb1 = gen_reg_rtx (Pmode);
40519 ub1 = gen_reg_rtx (Pmode);
40522 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
40523 emit_move_insn (ub1, adjust_address (op0, Pmode,
40524 GET_MODE_SIZE (Pmode)));
40528 emit_move_insn (res, op0);
40529 emit_move_insn (lb1, rh1);
40530 emit_move_insn (ub1, rh2);
40533 /* Put second bounds to temporaries. */
40534 lb2 = gen_reg_rtx (Pmode);
40535 ub2 = gen_reg_rtx (Pmode);
40538 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
40539 emit_move_insn (ub2, adjust_address (op1, Pmode,
40540 GET_MODE_SIZE (Pmode)));
40544 emit_move_insn (res, op1);
40545 emit_move_insn (lb2, rh1);
40546 emit_move_insn (ub2, rh2);
40550 ix86_emit_move_max (lb1, lb2);
40551 emit_move_insn (rh1, lb1);
40553 /* Compute UB. UB is stored in 1's complement form. Therefore
40554 we also use max here. */
40555 ix86_emit_move_max (ub1, ub2);
40556 emit_move_insn (rh2, ub1);
40558 emit_move_insn (target, res);
40563 case IX86_BUILTIN_SIZEOF:
40569 || GET_MODE (target) != Pmode
40570 || !register_operand (target, Pmode))
40571 target = gen_reg_rtx (Pmode);
40573 arg0 = CALL_EXPR_ARG (exp, 0);
40574 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
40576 name = DECL_ASSEMBLER_NAME (arg0);
40577 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
40579 emit_insn (Pmode == SImode
40580 ? gen_move_size_reloc_si (target, symbol)
40581 : gen_move_size_reloc_di (target, symbol));
40586 case IX86_BUILTIN_BNDLOWER:
40591 || GET_MODE (target) != Pmode
40592 || !register_operand (target, Pmode))
40593 target = gen_reg_rtx (Pmode);
40595 arg0 = CALL_EXPR_ARG (exp, 0);
40596 op0 = expand_normal (arg0);
40598 /* We need to move bounds to memory first. */
40603 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
40604 emit_move_insn (mem, op0);
40607 /* Generate mem expression to access LB and load it. */
40608 hmem = adjust_address (mem, Pmode, 0);
40609 emit_move_insn (target, hmem);
40614 case IX86_BUILTIN_BNDUPPER:
40616 rtx mem, hmem, res;
40619 || GET_MODE (target) != Pmode
40620 || !register_operand (target, Pmode))
40621 target = gen_reg_rtx (Pmode);
40623 arg0 = CALL_EXPR_ARG (exp, 0);
40624 op0 = expand_normal (arg0);
40626 /* We need to move bounds to memory first. */
40631 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
40632 emit_move_insn (mem, op0);
40635 /* Generate mem expression to access UB. */
40636 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
40638 /* We need to inverse all bits of UB. */
40639 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
40642 emit_move_insn (target, res);
40647 case IX86_BUILTIN_MASKMOVQ:
40648 case IX86_BUILTIN_MASKMOVDQU:
40649 icode = (fcode == IX86_BUILTIN_MASKMOVQ
40650 ? CODE_FOR_mmx_maskmovq
40651 : CODE_FOR_sse2_maskmovdqu);
40652 /* Note the arg order is different from the operand order. */
40653 arg1 = CALL_EXPR_ARG (exp, 0);
40654 arg2 = CALL_EXPR_ARG (exp, 1);
40655 arg0 = CALL_EXPR_ARG (exp, 2);
40656 op0 = expand_normal (arg0);
40657 op1 = expand_normal (arg1);
40658 op2 = expand_normal (arg2);
40659 mode0 = insn_data[icode].operand[0].mode;
40660 mode1 = insn_data[icode].operand[1].mode;
40661 mode2 = insn_data[icode].operand[2].mode;
40663 op0 = ix86_zero_extend_to_Pmode (op0);
40664 op0 = gen_rtx_MEM (mode1, op0);
40666 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40667 op0 = copy_to_mode_reg (mode0, op0);
40668 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40669 op1 = copy_to_mode_reg (mode1, op1);
40670 if (!insn_data[icode].operand[2].predicate (op2, mode2))
40671 op2 = copy_to_mode_reg (mode2, op2);
40672 pat = GEN_FCN (icode) (op0, op1, op2);
40678 case IX86_BUILTIN_LDMXCSR:
40679 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
40680 target = assign_386_stack_local (SImode, SLOT_TEMP);
40681 emit_move_insn (target, op0);
40682 emit_insn (gen_sse_ldmxcsr (target));
40685 case IX86_BUILTIN_STMXCSR:
40686 target = assign_386_stack_local (SImode, SLOT_TEMP);
40687 emit_insn (gen_sse_stmxcsr (target));
40688 return copy_to_mode_reg (SImode, target);
40690 case IX86_BUILTIN_CLFLUSH:
40691 arg0 = CALL_EXPR_ARG (exp, 0);
40692 op0 = expand_normal (arg0);
40693 icode = CODE_FOR_sse2_clflush;
40694 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40695 op0 = ix86_zero_extend_to_Pmode (op0);
40697 emit_insn (gen_sse2_clflush (op0));
40700 case IX86_BUILTIN_CLWB:
40701 arg0 = CALL_EXPR_ARG (exp, 0);
40702 op0 = expand_normal (arg0);
40703 icode = CODE_FOR_clwb;
40704 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40705 op0 = ix86_zero_extend_to_Pmode (op0);
40707 emit_insn (gen_clwb (op0));
40710 case IX86_BUILTIN_CLFLUSHOPT:
40711 arg0 = CALL_EXPR_ARG (exp, 0);
40712 op0 = expand_normal (arg0);
40713 icode = CODE_FOR_clflushopt;
40714 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40715 op0 = ix86_zero_extend_to_Pmode (op0);
40717 emit_insn (gen_clflushopt (op0));
40720 case IX86_BUILTIN_MONITOR:
40721 case IX86_BUILTIN_MONITORX:
40722 arg0 = CALL_EXPR_ARG (exp, 0);
40723 arg1 = CALL_EXPR_ARG (exp, 1);
40724 arg2 = CALL_EXPR_ARG (exp, 2);
40725 op0 = expand_normal (arg0);
40726 op1 = expand_normal (arg1);
40727 op2 = expand_normal (arg2);
40729 op0 = ix86_zero_extend_to_Pmode (op0);
40731 op1 = copy_to_mode_reg (SImode, op1);
40733 op2 = copy_to_mode_reg (SImode, op2);
40735 emit_insn (fcode == IX86_BUILTIN_MONITOR
40736 ? ix86_gen_monitor (op0, op1, op2)
40737 : ix86_gen_monitorx (op0, op1, op2));
40740 case IX86_BUILTIN_MWAIT:
40741 arg0 = CALL_EXPR_ARG (exp, 0);
40742 arg1 = CALL_EXPR_ARG (exp, 1);
40743 op0 = expand_normal (arg0);
40744 op1 = expand_normal (arg1);
40746 op0 = copy_to_mode_reg (SImode, op0);
40748 op1 = copy_to_mode_reg (SImode, op1);
40749 emit_insn (gen_sse3_mwait (op0, op1));
40752 case IX86_BUILTIN_MWAITX:
40753 arg0 = CALL_EXPR_ARG (exp, 0);
40754 arg1 = CALL_EXPR_ARG (exp, 1);
40755 arg2 = CALL_EXPR_ARG (exp, 2);
40756 op0 = expand_normal (arg0);
40757 op1 = expand_normal (arg1);
40758 op2 = expand_normal (arg2);
40760 op0 = copy_to_mode_reg (SImode, op0);
40762 op1 = copy_to_mode_reg (SImode, op1);
40764 op2 = copy_to_mode_reg (SImode, op2);
40765 emit_insn (gen_mwaitx (op0, op1, op2));
40768 case IX86_BUILTIN_CLZERO:
40769 arg0 = CALL_EXPR_ARG (exp, 0);
40770 op0 = expand_normal (arg0);
40772 op0 = ix86_zero_extend_to_Pmode (op0);
40773 emit_insn (ix86_gen_clzero (op0));
40776 case IX86_BUILTIN_VEC_INIT_V2SI:
40777 case IX86_BUILTIN_VEC_INIT_V4HI:
40778 case IX86_BUILTIN_VEC_INIT_V8QI:
40779 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
40781 case IX86_BUILTIN_VEC_EXT_V2DF:
40782 case IX86_BUILTIN_VEC_EXT_V2DI:
40783 case IX86_BUILTIN_VEC_EXT_V4SF:
40784 case IX86_BUILTIN_VEC_EXT_V4SI:
40785 case IX86_BUILTIN_VEC_EXT_V8HI:
40786 case IX86_BUILTIN_VEC_EXT_V2SI:
40787 case IX86_BUILTIN_VEC_EXT_V4HI:
40788 case IX86_BUILTIN_VEC_EXT_V16QI:
40789 return ix86_expand_vec_ext_builtin (exp, target);
40791 case IX86_BUILTIN_VEC_SET_V2DI:
40792 case IX86_BUILTIN_VEC_SET_V4SF:
40793 case IX86_BUILTIN_VEC_SET_V4SI:
40794 case IX86_BUILTIN_VEC_SET_V8HI:
40795 case IX86_BUILTIN_VEC_SET_V4HI:
40796 case IX86_BUILTIN_VEC_SET_V16QI:
40797 return ix86_expand_vec_set_builtin (exp);
40799 case IX86_BUILTIN_INFQ:
40800 case IX86_BUILTIN_HUGE_VALQ:
40802 REAL_VALUE_TYPE inf;
40806 tmp = const_double_from_real_value (inf, mode);
40808 tmp = validize_mem (force_const_mem (mode, tmp));
40811 target = gen_reg_rtx (mode);
40813 emit_move_insn (target, tmp);
40817 case IX86_BUILTIN_RDPMC:
40818 case IX86_BUILTIN_RDTSC:
40819 case IX86_BUILTIN_RDTSCP:
40821 op0 = gen_reg_rtx (DImode);
40822 op1 = gen_reg_rtx (DImode);
40824 if (fcode == IX86_BUILTIN_RDPMC)
40826 arg0 = CALL_EXPR_ARG (exp, 0);
40827 op2 = expand_normal (arg0);
40828 if (!register_operand (op2, SImode))
40829 op2 = copy_to_mode_reg (SImode, op2);
40831 insn = (TARGET_64BIT
40832 ? gen_rdpmc_rex64 (op0, op1, op2)
40833 : gen_rdpmc (op0, op2));
40836 else if (fcode == IX86_BUILTIN_RDTSC)
40838 insn = (TARGET_64BIT
40839 ? gen_rdtsc_rex64 (op0, op1)
40840 : gen_rdtsc (op0));
40845 op2 = gen_reg_rtx (SImode);
40847 insn = (TARGET_64BIT
40848 ? gen_rdtscp_rex64 (op0, op1, op2)
40849 : gen_rdtscp (op0, op2));
40852 arg0 = CALL_EXPR_ARG (exp, 0);
40853 op4 = expand_normal (arg0);
40854 if (!address_operand (op4, VOIDmode))
40856 op4 = convert_memory_address (Pmode, op4);
40857 op4 = copy_addr_to_reg (op4);
40859 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
40864 /* mode is VOIDmode if __builtin_rd* has been called
40866 if (mode == VOIDmode)
40868 target = gen_reg_rtx (mode);
40873 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
40874 op1, 1, OPTAB_DIRECT);
40875 op0 = expand_simple_binop (DImode, IOR, op0, op1,
40876 op0, 1, OPTAB_DIRECT);
40879 emit_move_insn (target, op0);
40882 case IX86_BUILTIN_FXSAVE:
40883 case IX86_BUILTIN_FXRSTOR:
40884 case IX86_BUILTIN_FXSAVE64:
40885 case IX86_BUILTIN_FXRSTOR64:
40886 case IX86_BUILTIN_FNSTENV:
40887 case IX86_BUILTIN_FLDENV:
40891 case IX86_BUILTIN_FXSAVE:
40892 icode = CODE_FOR_fxsave;
40894 case IX86_BUILTIN_FXRSTOR:
40895 icode = CODE_FOR_fxrstor;
40897 case IX86_BUILTIN_FXSAVE64:
40898 icode = CODE_FOR_fxsave64;
40900 case IX86_BUILTIN_FXRSTOR64:
40901 icode = CODE_FOR_fxrstor64;
40903 case IX86_BUILTIN_FNSTENV:
40904 icode = CODE_FOR_fnstenv;
40906 case IX86_BUILTIN_FLDENV:
40907 icode = CODE_FOR_fldenv;
40910 gcc_unreachable ();
40913 arg0 = CALL_EXPR_ARG (exp, 0);
40914 op0 = expand_normal (arg0);
40916 if (!address_operand (op0, VOIDmode))
40918 op0 = convert_memory_address (Pmode, op0);
40919 op0 = copy_addr_to_reg (op0);
40921 op0 = gen_rtx_MEM (mode0, op0);
40923 pat = GEN_FCN (icode) (op0);
40928 case IX86_BUILTIN_XSAVE:
40929 case IX86_BUILTIN_XRSTOR:
40930 case IX86_BUILTIN_XSAVE64:
40931 case IX86_BUILTIN_XRSTOR64:
40932 case IX86_BUILTIN_XSAVEOPT:
40933 case IX86_BUILTIN_XSAVEOPT64:
40934 case IX86_BUILTIN_XSAVES:
40935 case IX86_BUILTIN_XRSTORS:
40936 case IX86_BUILTIN_XSAVES64:
40937 case IX86_BUILTIN_XRSTORS64:
40938 case IX86_BUILTIN_XSAVEC:
40939 case IX86_BUILTIN_XSAVEC64:
40940 arg0 = CALL_EXPR_ARG (exp, 0);
40941 arg1 = CALL_EXPR_ARG (exp, 1);
40942 op0 = expand_normal (arg0);
40943 op1 = expand_normal (arg1);
40945 if (!address_operand (op0, VOIDmode))
40947 op0 = convert_memory_address (Pmode, op0);
40948 op0 = copy_addr_to_reg (op0);
40950 op0 = gen_rtx_MEM (BLKmode, op0);
40952 op1 = force_reg (DImode, op1);
40956 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
40957 NULL, 1, OPTAB_DIRECT);
40960 case IX86_BUILTIN_XSAVE:
40961 icode = CODE_FOR_xsave_rex64;
40963 case IX86_BUILTIN_XRSTOR:
40964 icode = CODE_FOR_xrstor_rex64;
40966 case IX86_BUILTIN_XSAVE64:
40967 icode = CODE_FOR_xsave64;
40969 case IX86_BUILTIN_XRSTOR64:
40970 icode = CODE_FOR_xrstor64;
40972 case IX86_BUILTIN_XSAVEOPT:
40973 icode = CODE_FOR_xsaveopt_rex64;
40975 case IX86_BUILTIN_XSAVEOPT64:
40976 icode = CODE_FOR_xsaveopt64;
40978 case IX86_BUILTIN_XSAVES:
40979 icode = CODE_FOR_xsaves_rex64;
40981 case IX86_BUILTIN_XRSTORS:
40982 icode = CODE_FOR_xrstors_rex64;
40984 case IX86_BUILTIN_XSAVES64:
40985 icode = CODE_FOR_xsaves64;
40987 case IX86_BUILTIN_XRSTORS64:
40988 icode = CODE_FOR_xrstors64;
40990 case IX86_BUILTIN_XSAVEC:
40991 icode = CODE_FOR_xsavec_rex64;
40993 case IX86_BUILTIN_XSAVEC64:
40994 icode = CODE_FOR_xsavec64;
40997 gcc_unreachable ();
41000 op2 = gen_lowpart (SImode, op2);
41001 op1 = gen_lowpart (SImode, op1);
41002 pat = GEN_FCN (icode) (op0, op1, op2);
41008 case IX86_BUILTIN_XSAVE:
41009 icode = CODE_FOR_xsave;
41011 case IX86_BUILTIN_XRSTOR:
41012 icode = CODE_FOR_xrstor;
41014 case IX86_BUILTIN_XSAVEOPT:
41015 icode = CODE_FOR_xsaveopt;
41017 case IX86_BUILTIN_XSAVES:
41018 icode = CODE_FOR_xsaves;
41020 case IX86_BUILTIN_XRSTORS:
41021 icode = CODE_FOR_xrstors;
41023 case IX86_BUILTIN_XSAVEC:
41024 icode = CODE_FOR_xsavec;
41027 gcc_unreachable ();
41029 pat = GEN_FCN (icode) (op0, op1);
41036 case IX86_BUILTIN_LLWPCB:
41037 arg0 = CALL_EXPR_ARG (exp, 0);
41038 op0 = expand_normal (arg0);
41039 icode = CODE_FOR_lwp_llwpcb;
41040 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
41041 op0 = ix86_zero_extend_to_Pmode (op0);
41042 emit_insn (gen_lwp_llwpcb (op0));
41045 case IX86_BUILTIN_SLWPCB:
41046 icode = CODE_FOR_lwp_slwpcb;
41048 || !insn_data[icode].operand[0].predicate (target, Pmode))
41049 target = gen_reg_rtx (Pmode);
41050 emit_insn (gen_lwp_slwpcb (target));
41053 case IX86_BUILTIN_BEXTRI32:
41054 case IX86_BUILTIN_BEXTRI64:
41055 arg0 = CALL_EXPR_ARG (exp, 0);
41056 arg1 = CALL_EXPR_ARG (exp, 1);
41057 op0 = expand_normal (arg0);
41058 op1 = expand_normal (arg1);
41059 icode = (fcode == IX86_BUILTIN_BEXTRI32
41060 ? CODE_FOR_tbm_bextri_si
41061 : CODE_FOR_tbm_bextri_di);
41062 if (!CONST_INT_P (op1))
41064 error ("last argument must be an immediate");
41069 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
41070 unsigned char lsb_index = INTVAL (op1) & 0xFF;
41071 op1 = GEN_INT (length);
41072 op2 = GEN_INT (lsb_index);
41073 pat = GEN_FCN (icode) (target, op0, op1, op2);
41079 case IX86_BUILTIN_RDRAND16_STEP:
41080 icode = CODE_FOR_rdrandhi_1;
41084 case IX86_BUILTIN_RDRAND32_STEP:
41085 icode = CODE_FOR_rdrandsi_1;
41089 case IX86_BUILTIN_RDRAND64_STEP:
41090 icode = CODE_FOR_rdranddi_1;
41094 op0 = gen_reg_rtx (mode0);
41095 emit_insn (GEN_FCN (icode) (op0));
41097 arg0 = CALL_EXPR_ARG (exp, 0);
41098 op1 = expand_normal (arg0);
41099 if (!address_operand (op1, VOIDmode))
41101 op1 = convert_memory_address (Pmode, op1);
41102 op1 = copy_addr_to_reg (op1);
41104 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
41106 op1 = gen_reg_rtx (SImode);
41107 emit_move_insn (op1, CONST1_RTX (SImode));
41109 /* Emit SImode conditional move. */
41110 if (mode0 == HImode)
41112 op2 = gen_reg_rtx (SImode);
41113 emit_insn (gen_zero_extendhisi2 (op2, op0));
41115 else if (mode0 == SImode)
41118 op2 = gen_rtx_SUBREG (SImode, op0, 0);
41121 || !register_operand (target, SImode))
41122 target = gen_reg_rtx (SImode);
41124 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
41126 emit_insn (gen_rtx_SET (target,
41127 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
41130 case IX86_BUILTIN_RDSEED16_STEP:
41131 icode = CODE_FOR_rdseedhi_1;
41135 case IX86_BUILTIN_RDSEED32_STEP:
41136 icode = CODE_FOR_rdseedsi_1;
41140 case IX86_BUILTIN_RDSEED64_STEP:
41141 icode = CODE_FOR_rdseeddi_1;
41145 op0 = gen_reg_rtx (mode0);
41146 emit_insn (GEN_FCN (icode) (op0));
41148 arg0 = CALL_EXPR_ARG (exp, 0);
41149 op1 = expand_normal (arg0);
41150 if (!address_operand (op1, VOIDmode))
41152 op1 = convert_memory_address (Pmode, op1);
41153 op1 = copy_addr_to_reg (op1);
41155 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
41157 op2 = gen_reg_rtx (QImode);
41159 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
41161 emit_insn (gen_rtx_SET (op2, pat));
41164 || !register_operand (target, SImode))
41165 target = gen_reg_rtx (SImode);
41167 emit_insn (gen_zero_extendqisi2 (target, op2));
41170 case IX86_BUILTIN_SBB32:
41171 icode = CODE_FOR_subborrowsi;
41175 case IX86_BUILTIN_SBB64:
41176 icode = CODE_FOR_subborrowdi;
41180 case IX86_BUILTIN_ADDCARRYX32:
41181 icode = CODE_FOR_addcarrysi;
41185 case IX86_BUILTIN_ADDCARRYX64:
41186 icode = CODE_FOR_addcarrydi;
41190 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
41191 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
41192 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
41193 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
41195 op1 = expand_normal (arg0);
41196 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
41198 op2 = expand_normal (arg1);
41199 if (!register_operand (op2, mode0))
41200 op2 = copy_to_mode_reg (mode0, op2);
41202 op3 = expand_normal (arg2);
41203 if (!register_operand (op3, mode0))
41204 op3 = copy_to_mode_reg (mode0, op3);
41206 op4 = expand_normal (arg3);
41207 if (!address_operand (op4, VOIDmode))
41209 op4 = convert_memory_address (Pmode, op4);
41210 op4 = copy_addr_to_reg (op4);
41213 /* Generate CF from input operand. */
41214 emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
41216 /* Generate instruction that consumes CF. */
41217 op0 = gen_reg_rtx (mode0);
41219 op1 = gen_rtx_REG (CCCmode, FLAGS_REG);
41220 pat = gen_rtx_LTU (mode0, op1, const0_rtx);
41221 emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat));
41223 /* Return current CF value. */
41225 target = gen_reg_rtx (QImode);
41227 PUT_MODE (pat, QImode);
41228 emit_insn (gen_rtx_SET (target, pat));
41230 /* Store the result. */
41231 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
41235 case IX86_BUILTIN_READ_FLAGS:
41236 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
41239 || target == NULL_RTX
41240 || !nonimmediate_operand (target, word_mode)
41241 || GET_MODE (target) != word_mode)
41242 target = gen_reg_rtx (word_mode);
41244 emit_insn (gen_pop (target));
41247 case IX86_BUILTIN_WRITE_FLAGS:
41249 arg0 = CALL_EXPR_ARG (exp, 0);
41250 op0 = expand_normal (arg0);
41251 if (!general_no_elim_operand (op0, word_mode))
41252 op0 = copy_to_mode_reg (word_mode, op0);
41254 emit_insn (gen_push (op0));
41255 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
41258 case IX86_BUILTIN_KORTESTC16:
41259 icode = CODE_FOR_kortestchi;
41264 case IX86_BUILTIN_KORTESTZ16:
41265 icode = CODE_FOR_kortestzhi;
41270 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
41271 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
41272 op0 = expand_normal (arg0);
41273 op1 = expand_normal (arg1);
41275 op0 = copy_to_reg (op0);
41276 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
41277 op1 = copy_to_reg (op1);
41278 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
41280 target = gen_reg_rtx (QImode);
41281 emit_insn (gen_rtx_SET (target, const0_rtx));
41283 /* Emit kortest. */
41284 emit_insn (GEN_FCN (icode) (op0, op1));
41285 /* And use setcc to return result from flags. */
41286 ix86_expand_setcc (target, EQ,
41287 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
41290 case IX86_BUILTIN_GATHERSIV2DF:
41291 icode = CODE_FOR_avx2_gathersiv2df;
41293 case IX86_BUILTIN_GATHERSIV4DF:
41294 icode = CODE_FOR_avx2_gathersiv4df;
41296 case IX86_BUILTIN_GATHERDIV2DF:
41297 icode = CODE_FOR_avx2_gatherdiv2df;
41299 case IX86_BUILTIN_GATHERDIV4DF:
41300 icode = CODE_FOR_avx2_gatherdiv4df;
41302 case IX86_BUILTIN_GATHERSIV4SF:
41303 icode = CODE_FOR_avx2_gathersiv4sf;
41305 case IX86_BUILTIN_GATHERSIV8SF:
41306 icode = CODE_FOR_avx2_gathersiv8sf;
41308 case IX86_BUILTIN_GATHERDIV4SF:
41309 icode = CODE_FOR_avx2_gatherdiv4sf;
41311 case IX86_BUILTIN_GATHERDIV8SF:
41312 icode = CODE_FOR_avx2_gatherdiv8sf;
41314 case IX86_BUILTIN_GATHERSIV2DI:
41315 icode = CODE_FOR_avx2_gathersiv2di;
41317 case IX86_BUILTIN_GATHERSIV4DI:
41318 icode = CODE_FOR_avx2_gathersiv4di;
41320 case IX86_BUILTIN_GATHERDIV2DI:
41321 icode = CODE_FOR_avx2_gatherdiv2di;
41323 case IX86_BUILTIN_GATHERDIV4DI:
41324 icode = CODE_FOR_avx2_gatherdiv4di;
41326 case IX86_BUILTIN_GATHERSIV4SI:
41327 icode = CODE_FOR_avx2_gathersiv4si;
41329 case IX86_BUILTIN_GATHERSIV8SI:
41330 icode = CODE_FOR_avx2_gathersiv8si;
41332 case IX86_BUILTIN_GATHERDIV4SI:
41333 icode = CODE_FOR_avx2_gatherdiv4si;
41335 case IX86_BUILTIN_GATHERDIV8SI:
41336 icode = CODE_FOR_avx2_gatherdiv8si;
41338 case IX86_BUILTIN_GATHERALTSIV4DF:
41339 icode = CODE_FOR_avx2_gathersiv4df;
41341 case IX86_BUILTIN_GATHERALTDIV8SF:
41342 icode = CODE_FOR_avx2_gatherdiv8sf;
41344 case IX86_BUILTIN_GATHERALTSIV4DI:
41345 icode = CODE_FOR_avx2_gathersiv4di;
41347 case IX86_BUILTIN_GATHERALTDIV8SI:
41348 icode = CODE_FOR_avx2_gatherdiv8si;
41350 case IX86_BUILTIN_GATHER3SIV16SF:
41351 icode = CODE_FOR_avx512f_gathersiv16sf;
41353 case IX86_BUILTIN_GATHER3SIV8DF:
41354 icode = CODE_FOR_avx512f_gathersiv8df;
41356 case IX86_BUILTIN_GATHER3DIV16SF:
41357 icode = CODE_FOR_avx512f_gatherdiv16sf;
41359 case IX86_BUILTIN_GATHER3DIV8DF:
41360 icode = CODE_FOR_avx512f_gatherdiv8df;
41362 case IX86_BUILTIN_GATHER3SIV16SI:
41363 icode = CODE_FOR_avx512f_gathersiv16si;
41365 case IX86_BUILTIN_GATHER3SIV8DI:
41366 icode = CODE_FOR_avx512f_gathersiv8di;
41368 case IX86_BUILTIN_GATHER3DIV16SI:
41369 icode = CODE_FOR_avx512f_gatherdiv16si;
41371 case IX86_BUILTIN_GATHER3DIV8DI:
41372 icode = CODE_FOR_avx512f_gatherdiv8di;
41374 case IX86_BUILTIN_GATHER3ALTSIV8DF:
41375 icode = CODE_FOR_avx512f_gathersiv8df;
41377 case IX86_BUILTIN_GATHER3ALTDIV16SF:
41378 icode = CODE_FOR_avx512f_gatherdiv16sf;
41380 case IX86_BUILTIN_GATHER3ALTSIV8DI:
41381 icode = CODE_FOR_avx512f_gathersiv8di;
41383 case IX86_BUILTIN_GATHER3ALTDIV16SI:
41384 icode = CODE_FOR_avx512f_gatherdiv16si;
41386 case IX86_BUILTIN_GATHER3SIV2DF:
41387 icode = CODE_FOR_avx512vl_gathersiv2df;
41389 case IX86_BUILTIN_GATHER3SIV4DF:
41390 icode = CODE_FOR_avx512vl_gathersiv4df;
41392 case IX86_BUILTIN_GATHER3DIV2DF:
41393 icode = CODE_FOR_avx512vl_gatherdiv2df;
41395 case IX86_BUILTIN_GATHER3DIV4DF:
41396 icode = CODE_FOR_avx512vl_gatherdiv4df;
41398 case IX86_BUILTIN_GATHER3SIV4SF:
41399 icode = CODE_FOR_avx512vl_gathersiv4sf;
41401 case IX86_BUILTIN_GATHER3SIV8SF:
41402 icode = CODE_FOR_avx512vl_gathersiv8sf;
41404 case IX86_BUILTIN_GATHER3DIV4SF:
41405 icode = CODE_FOR_avx512vl_gatherdiv4sf;
41407 case IX86_BUILTIN_GATHER3DIV8SF:
41408 icode = CODE_FOR_avx512vl_gatherdiv8sf;
41410 case IX86_BUILTIN_GATHER3SIV2DI:
41411 icode = CODE_FOR_avx512vl_gathersiv2di;
41413 case IX86_BUILTIN_GATHER3SIV4DI:
41414 icode = CODE_FOR_avx512vl_gathersiv4di;
41416 case IX86_BUILTIN_GATHER3DIV2DI:
41417 icode = CODE_FOR_avx512vl_gatherdiv2di;
41419 case IX86_BUILTIN_GATHER3DIV4DI:
41420 icode = CODE_FOR_avx512vl_gatherdiv4di;
41422 case IX86_BUILTIN_GATHER3SIV4SI:
41423 icode = CODE_FOR_avx512vl_gathersiv4si;
41425 case IX86_BUILTIN_GATHER3SIV8SI:
41426 icode = CODE_FOR_avx512vl_gathersiv8si;
41428 case IX86_BUILTIN_GATHER3DIV4SI:
41429 icode = CODE_FOR_avx512vl_gatherdiv4si;
41431 case IX86_BUILTIN_GATHER3DIV8SI:
41432 icode = CODE_FOR_avx512vl_gatherdiv8si;
41434 case IX86_BUILTIN_GATHER3ALTSIV4DF:
41435 icode = CODE_FOR_avx512vl_gathersiv4df;
41437 case IX86_BUILTIN_GATHER3ALTDIV8SF:
41438 icode = CODE_FOR_avx512vl_gatherdiv8sf;
41440 case IX86_BUILTIN_GATHER3ALTSIV4DI:
41441 icode = CODE_FOR_avx512vl_gathersiv4di;
41443 case IX86_BUILTIN_GATHER3ALTDIV8SI:
41444 icode = CODE_FOR_avx512vl_gatherdiv8si;
41446 case IX86_BUILTIN_SCATTERSIV16SF:
41447 icode = CODE_FOR_avx512f_scattersiv16sf;
41449 case IX86_BUILTIN_SCATTERSIV8DF:
41450 icode = CODE_FOR_avx512f_scattersiv8df;
41452 case IX86_BUILTIN_SCATTERDIV16SF:
41453 icode = CODE_FOR_avx512f_scatterdiv16sf;
41455 case IX86_BUILTIN_SCATTERDIV8DF:
41456 icode = CODE_FOR_avx512f_scatterdiv8df;
41458 case IX86_BUILTIN_SCATTERSIV16SI:
41459 icode = CODE_FOR_avx512f_scattersiv16si;
41461 case IX86_BUILTIN_SCATTERSIV8DI:
41462 icode = CODE_FOR_avx512f_scattersiv8di;
41464 case IX86_BUILTIN_SCATTERDIV16SI:
41465 icode = CODE_FOR_avx512f_scatterdiv16si;
41467 case IX86_BUILTIN_SCATTERDIV8DI:
41468 icode = CODE_FOR_avx512f_scatterdiv8di;
41470 case IX86_BUILTIN_SCATTERSIV8SF:
41471 icode = CODE_FOR_avx512vl_scattersiv8sf;
41473 case IX86_BUILTIN_SCATTERSIV4SF:
41474 icode = CODE_FOR_avx512vl_scattersiv4sf;
41476 case IX86_BUILTIN_SCATTERSIV4DF:
41477 icode = CODE_FOR_avx512vl_scattersiv4df;
41479 case IX86_BUILTIN_SCATTERSIV2DF:
41480 icode = CODE_FOR_avx512vl_scattersiv2df;
41482 case IX86_BUILTIN_SCATTERDIV8SF:
41483 icode = CODE_FOR_avx512vl_scatterdiv8sf;
41485 case IX86_BUILTIN_SCATTERDIV4SF:
41486 icode = CODE_FOR_avx512vl_scatterdiv4sf;
41488 case IX86_BUILTIN_SCATTERDIV4DF:
41489 icode = CODE_FOR_avx512vl_scatterdiv4df;
41491 case IX86_BUILTIN_SCATTERDIV2DF:
41492 icode = CODE_FOR_avx512vl_scatterdiv2df;
41494 case IX86_BUILTIN_SCATTERSIV8SI:
41495 icode = CODE_FOR_avx512vl_scattersiv8si;
41497 case IX86_BUILTIN_SCATTERSIV4SI:
41498 icode = CODE_FOR_avx512vl_scattersiv4si;
41500 case IX86_BUILTIN_SCATTERSIV4DI:
41501 icode = CODE_FOR_avx512vl_scattersiv4di;
41503 case IX86_BUILTIN_SCATTERSIV2DI:
41504 icode = CODE_FOR_avx512vl_scattersiv2di;
41506 case IX86_BUILTIN_SCATTERDIV8SI:
41507 icode = CODE_FOR_avx512vl_scatterdiv8si;
41509 case IX86_BUILTIN_SCATTERDIV4SI:
41510 icode = CODE_FOR_avx512vl_scatterdiv4si;
41512 case IX86_BUILTIN_SCATTERDIV4DI:
41513 icode = CODE_FOR_avx512vl_scatterdiv4di;
41515 case IX86_BUILTIN_SCATTERDIV2DI:
41516 icode = CODE_FOR_avx512vl_scatterdiv2di;
41518 case IX86_BUILTIN_GATHERPFDPD:
41519 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
41520 goto vec_prefetch_gen;
41521 case IX86_BUILTIN_SCATTERALTSIV8DF:
41522 icode = CODE_FOR_avx512f_scattersiv8df;
41524 case IX86_BUILTIN_SCATTERALTDIV16SF:
41525 icode = CODE_FOR_avx512f_scatterdiv16sf;
41527 case IX86_BUILTIN_SCATTERALTSIV8DI:
41528 icode = CODE_FOR_avx512f_scattersiv8di;
41530 case IX86_BUILTIN_SCATTERALTDIV16SI:
41531 icode = CODE_FOR_avx512f_scatterdiv16si;
41533 case IX86_BUILTIN_GATHERPFDPS:
41534 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
41535 goto vec_prefetch_gen;
41536 case IX86_BUILTIN_GATHERPFQPD:
41537 icode = CODE_FOR_avx512pf_gatherpfv8didf;
41538 goto vec_prefetch_gen;
41539 case IX86_BUILTIN_GATHERPFQPS:
41540 icode = CODE_FOR_avx512pf_gatherpfv8disf;
41541 goto vec_prefetch_gen;
41542 case IX86_BUILTIN_SCATTERPFDPD:
41543 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
41544 goto vec_prefetch_gen;
41545 case IX86_BUILTIN_SCATTERPFDPS:
41546 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
41547 goto vec_prefetch_gen;
41548 case IX86_BUILTIN_SCATTERPFQPD:
41549 icode = CODE_FOR_avx512pf_scatterpfv8didf;
41550 goto vec_prefetch_gen;
41551 case IX86_BUILTIN_SCATTERPFQPS:
41552 icode = CODE_FOR_avx512pf_scatterpfv8disf;
41553 goto vec_prefetch_gen;
41557 rtx (*gen) (rtx, rtx);
41559 arg0 = CALL_EXPR_ARG (exp, 0);
41560 arg1 = CALL_EXPR_ARG (exp, 1);
41561 arg2 = CALL_EXPR_ARG (exp, 2);
41562 arg3 = CALL_EXPR_ARG (exp, 3);
41563 arg4 = CALL_EXPR_ARG (exp, 4);
41564 op0 = expand_normal (arg0);
41565 op1 = expand_normal (arg1);
41566 op2 = expand_normal (arg2);
41567 op3 = expand_normal (arg3);
41568 op4 = expand_normal (arg4);
41569 /* Note the arg order is different from the operand order. */
41570 mode0 = insn_data[icode].operand[1].mode;
41571 mode2 = insn_data[icode].operand[3].mode;
41572 mode3 = insn_data[icode].operand[4].mode;
41573 mode4 = insn_data[icode].operand[5].mode;
41575 if (target == NULL_RTX
41576 || GET_MODE (target) != insn_data[icode].operand[0].mode
41577 || !insn_data[icode].operand[0].predicate (target,
41578 GET_MODE (target)))
41579 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
41581 subtarget = target;
41585 case IX86_BUILTIN_GATHER3ALTSIV8DF:
41586 case IX86_BUILTIN_GATHER3ALTSIV8DI:
41587 half = gen_reg_rtx (V8SImode);
41588 if (!nonimmediate_operand (op2, V16SImode))
41589 op2 = copy_to_mode_reg (V16SImode, op2);
41590 emit_insn (gen_vec_extract_lo_v16si (half, op2));
41593 case IX86_BUILTIN_GATHER3ALTSIV4DF:
41594 case IX86_BUILTIN_GATHER3ALTSIV4DI:
41595 case IX86_BUILTIN_GATHERALTSIV4DF:
41596 case IX86_BUILTIN_GATHERALTSIV4DI:
41597 half = gen_reg_rtx (V4SImode);
41598 if (!nonimmediate_operand (op2, V8SImode))
41599 op2 = copy_to_mode_reg (V8SImode, op2);
41600 emit_insn (gen_vec_extract_lo_v8si (half, op2));
41603 case IX86_BUILTIN_GATHER3ALTDIV16SF:
41604 case IX86_BUILTIN_GATHER3ALTDIV16SI:
41605 half = gen_reg_rtx (mode0);
41606 if (mode0 == V8SFmode)
41607 gen = gen_vec_extract_lo_v16sf;
41609 gen = gen_vec_extract_lo_v16si;
41610 if (!nonimmediate_operand (op0, GET_MODE (op0)))
41611 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
41612 emit_insn (gen (half, op0));
41614 if (GET_MODE (op3) != VOIDmode)
41616 if (!nonimmediate_operand (op3, GET_MODE (op3)))
41617 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
41618 emit_insn (gen (half, op3));
41622 case IX86_BUILTIN_GATHER3ALTDIV8SF:
41623 case IX86_BUILTIN_GATHER3ALTDIV8SI:
41624 case IX86_BUILTIN_GATHERALTDIV8SF:
41625 case IX86_BUILTIN_GATHERALTDIV8SI:
41626 half = gen_reg_rtx (mode0);
41627 if (mode0 == V4SFmode)
41628 gen = gen_vec_extract_lo_v8sf;
41630 gen = gen_vec_extract_lo_v8si;
41631 if (!nonimmediate_operand (op0, GET_MODE (op0)))
41632 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
41633 emit_insn (gen (half, op0));
41635 if (GET_MODE (op3) != VOIDmode)
41637 if (!nonimmediate_operand (op3, GET_MODE (op3)))
41638 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
41639 emit_insn (gen (half, op3));
41647 /* Force memory operand only with base register here. But we
41648 don't want to do it on memory operand for other builtin
41650 op1 = ix86_zero_extend_to_Pmode (op1);
41652 if (!insn_data[icode].operand[1].predicate (op0, mode0))
41653 op0 = copy_to_mode_reg (mode0, op0);
41654 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
41655 op1 = copy_to_mode_reg (Pmode, op1);
41656 if (!insn_data[icode].operand[3].predicate (op2, mode2))
41657 op2 = copy_to_mode_reg (mode2, op2);
41659 op3 = fixup_modeless_constant (op3, mode3);
41661 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
41663 if (!insn_data[icode].operand[4].predicate (op3, mode3))
41664 op3 = copy_to_mode_reg (mode3, op3);
41668 op3 = copy_to_reg (op3);
41669 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
41671 if (!insn_data[icode].operand[5].predicate (op4, mode4))
41673 error ("the last argument must be scale 1, 2, 4, 8");
41677 /* Optimize. If mask is known to have all high bits set,
41678 replace op0 with pc_rtx to signal that the instruction
41679 overwrites the whole destination and doesn't use its
41680 previous contents. */
41683 if (TREE_CODE (arg3) == INTEGER_CST)
41685 if (integer_all_onesp (arg3))
41688 else if (TREE_CODE (arg3) == VECTOR_CST)
41690 unsigned int negative = 0;
41691 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
41693 tree cst = VECTOR_CST_ELT (arg3, i);
41694 if (TREE_CODE (cst) == INTEGER_CST
41695 && tree_int_cst_sign_bit (cst))
41697 else if (TREE_CODE (cst) == REAL_CST
41698 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
41701 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
41704 else if (TREE_CODE (arg3) == SSA_NAME
41705 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
41707 /* Recognize also when mask is like:
41708 __v2df src = _mm_setzero_pd ();
41709 __v2df mask = _mm_cmpeq_pd (src, src);
41711 __v8sf src = _mm256_setzero_ps ();
41712 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
41713 as that is a cheaper way to load all ones into
41714 a register than having to load a constant from
41716 gimple *def_stmt = SSA_NAME_DEF_STMT (arg3);
41717 if (is_gimple_call (def_stmt))
41719 tree fndecl = gimple_call_fndecl (def_stmt);
41721 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
41722 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
41724 case IX86_BUILTIN_CMPPD:
41725 case IX86_BUILTIN_CMPPS:
41726 case IX86_BUILTIN_CMPPD256:
41727 case IX86_BUILTIN_CMPPS256:
41728 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
41731 case IX86_BUILTIN_CMPEQPD:
41732 case IX86_BUILTIN_CMPEQPS:
41733 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
41734 && initializer_zerop (gimple_call_arg (def_stmt,
41745 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
41752 case IX86_BUILTIN_GATHER3DIV16SF:
41753 if (target == NULL_RTX)
41754 target = gen_reg_rtx (V8SFmode);
41755 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
41757 case IX86_BUILTIN_GATHER3DIV16SI:
41758 if (target == NULL_RTX)
41759 target = gen_reg_rtx (V8SImode);
41760 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
41762 case IX86_BUILTIN_GATHER3DIV8SF:
41763 case IX86_BUILTIN_GATHERDIV8SF:
41764 if (target == NULL_RTX)
41765 target = gen_reg_rtx (V4SFmode);
41766 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
41768 case IX86_BUILTIN_GATHER3DIV8SI:
41769 case IX86_BUILTIN_GATHERDIV8SI:
41770 if (target == NULL_RTX)
41771 target = gen_reg_rtx (V4SImode);
41772 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
41775 target = subtarget;
41781 arg0 = CALL_EXPR_ARG (exp, 0);
41782 arg1 = CALL_EXPR_ARG (exp, 1);
41783 arg2 = CALL_EXPR_ARG (exp, 2);
41784 arg3 = CALL_EXPR_ARG (exp, 3);
41785 arg4 = CALL_EXPR_ARG (exp, 4);
41786 op0 = expand_normal (arg0);
41787 op1 = expand_normal (arg1);
41788 op2 = expand_normal (arg2);
41789 op3 = expand_normal (arg3);
41790 op4 = expand_normal (arg4);
41791 mode1 = insn_data[icode].operand[1].mode;
41792 mode2 = insn_data[icode].operand[2].mode;
41793 mode3 = insn_data[icode].operand[3].mode;
41794 mode4 = insn_data[icode].operand[4].mode;
41796 /* Scatter instruction stores operand op3 to memory with
41797 indices from op2 and scale from op4 under writemask op1.
41798 If index operand op2 has more elements then source operand
41799 op3 one need to use only its low half. And vice versa. */
41802 case IX86_BUILTIN_SCATTERALTSIV8DF:
41803 case IX86_BUILTIN_SCATTERALTSIV8DI:
41804 half = gen_reg_rtx (V8SImode);
41805 if (!nonimmediate_operand (op2, V16SImode))
41806 op2 = copy_to_mode_reg (V16SImode, op2);
41807 emit_insn (gen_vec_extract_lo_v16si (half, op2));
41810 case IX86_BUILTIN_SCATTERALTDIV16SF:
41811 case IX86_BUILTIN_SCATTERALTDIV16SI:
41812 half = gen_reg_rtx (mode3);
41813 if (mode3 == V8SFmode)
41814 gen = gen_vec_extract_lo_v16sf;
41816 gen = gen_vec_extract_lo_v16si;
41817 if (!nonimmediate_operand (op3, GET_MODE (op3)))
41818 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
41819 emit_insn (gen (half, op3));
41826 /* Force memory operand only with base register here. But we
41827 don't want to do it on memory operand for other builtin
41829 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
41831 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
41832 op0 = copy_to_mode_reg (Pmode, op0);
41834 op1 = fixup_modeless_constant (op1, mode1);
41836 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
41838 if (!insn_data[icode].operand[1].predicate (op1, mode1))
41839 op1 = copy_to_mode_reg (mode1, op1);
41843 op1 = copy_to_reg (op1);
41844 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
41847 if (!insn_data[icode].operand[2].predicate (op2, mode2))
41848 op2 = copy_to_mode_reg (mode2, op2);
41850 if (!insn_data[icode].operand[3].predicate (op3, mode3))
41851 op3 = copy_to_mode_reg (mode3, op3);
41853 if (!insn_data[icode].operand[4].predicate (op4, mode4))
41855 error ("the last argument must be scale 1, 2, 4, 8");
41859 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
41867 arg0 = CALL_EXPR_ARG (exp, 0);
41868 arg1 = CALL_EXPR_ARG (exp, 1);
41869 arg2 = CALL_EXPR_ARG (exp, 2);
41870 arg3 = CALL_EXPR_ARG (exp, 3);
41871 arg4 = CALL_EXPR_ARG (exp, 4);
41872 op0 = expand_normal (arg0);
41873 op1 = expand_normal (arg1);
41874 op2 = expand_normal (arg2);
41875 op3 = expand_normal (arg3);
41876 op4 = expand_normal (arg4);
41877 mode0 = insn_data[icode].operand[0].mode;
41878 mode1 = insn_data[icode].operand[1].mode;
41879 mode3 = insn_data[icode].operand[3].mode;
41880 mode4 = insn_data[icode].operand[4].mode;
41882 op0 = fixup_modeless_constant (op0, mode0);
41884 if (GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
41886 if (!insn_data[icode].operand[0].predicate (op0, mode0))
41887 op0 = copy_to_mode_reg (mode0, op0);
41891 op0 = copy_to_reg (op0);
41892 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
41895 if (!insn_data[icode].operand[1].predicate (op1, mode1))
41896 op1 = copy_to_mode_reg (mode1, op1);
41898 /* Force memory operand only with base register here. But we
41899 don't want to do it on memory operand for other builtin
41901 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
41903 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
41904 op2 = copy_to_mode_reg (Pmode, op2);
41906 if (!insn_data[icode].operand[3].predicate (op3, mode3))
41908 error ("the forth argument must be scale 1, 2, 4, 8");
41912 if (!insn_data[icode].operand[4].predicate (op4, mode4))
41914 error ("incorrect hint operand");
41918 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
41926 case IX86_BUILTIN_XABORT:
41927 icode = CODE_FOR_xabort;
41928 arg0 = CALL_EXPR_ARG (exp, 0);
41929 op0 = expand_normal (arg0);
41930 mode0 = insn_data[icode].operand[0].mode;
41931 if (!insn_data[icode].operand[0].predicate (op0, mode0))
41933 error ("the xabort's argument must be an 8-bit immediate");
41936 emit_insn (gen_xabort (op0));
41943 for (i = 0, d = bdesc_special_args;
41944 i < ARRAY_SIZE (bdesc_special_args);
41946 if (d->code == fcode)
41947 return ix86_expand_special_args_builtin (d, exp, target);
41949 for (i = 0, d = bdesc_args;
41950 i < ARRAY_SIZE (bdesc_args);
41952 if (d->code == fcode)
41955 case IX86_BUILTIN_FABSQ:
41956 case IX86_BUILTIN_COPYSIGNQ:
41958 /* Emit a normal call if SSE isn't available. */
41959 return expand_call (exp, target, ignore);
41961 return ix86_expand_args_builtin (d, exp, target);
41964 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
41965 if (d->code == fcode)
41966 return ix86_expand_sse_comi (d, exp, target);
41968 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
41969 if (d->code == fcode)
41970 return ix86_expand_round_builtin (d, exp, target);
41972 for (i = 0, d = bdesc_pcmpestr;
41973 i < ARRAY_SIZE (bdesc_pcmpestr);
41975 if (d->code == fcode)
41976 return ix86_expand_sse_pcmpestr (d, exp, target);
41978 for (i = 0, d = bdesc_pcmpistr;
41979 i < ARRAY_SIZE (bdesc_pcmpistr);
41981 if (d->code == fcode)
41982 return ix86_expand_sse_pcmpistr (d, exp, target);
41984 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
41985 if (d->code == fcode)
41986 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
41987 (enum ix86_builtin_func_type)
41988 d->flag, d->comparison);
41990 gcc_unreachable ();
41993 /* This returns the target-specific builtin with code CODE if
41994 current_function_decl has visibility on this builtin, which is checked
41995 using isa flags. Returns NULL_TREE otherwise. */
41997 static tree ix86_get_builtin (enum ix86_builtins code)
41999 struct cl_target_option *opts;
42000 tree target_tree = NULL_TREE;
42002 /* Determine the isa flags of current_function_decl. */
42004 if (current_function_decl)
42005 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
42007 if (target_tree == NULL)
42008 target_tree = target_option_default_node;
42010 opts = TREE_TARGET_OPTION (target_tree);
42012 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
42013 return ix86_builtin_decl (code, true);
42018 /* Return function decl for target specific builtin
42019 for given MPX builtin passed i FCODE. */
42021 ix86_builtin_mpx_function (unsigned fcode)
42025 case BUILT_IN_CHKP_BNDMK:
42026 return ix86_builtins[IX86_BUILTIN_BNDMK];
42028 case BUILT_IN_CHKP_BNDSTX:
42029 return ix86_builtins[IX86_BUILTIN_BNDSTX];
42031 case BUILT_IN_CHKP_BNDLDX:
42032 return ix86_builtins[IX86_BUILTIN_BNDLDX];
42034 case BUILT_IN_CHKP_BNDCL:
42035 return ix86_builtins[IX86_BUILTIN_BNDCL];
42037 case BUILT_IN_CHKP_BNDCU:
42038 return ix86_builtins[IX86_BUILTIN_BNDCU];
42040 case BUILT_IN_CHKP_BNDRET:
42041 return ix86_builtins[IX86_BUILTIN_BNDRET];
42043 case BUILT_IN_CHKP_INTERSECT:
42044 return ix86_builtins[IX86_BUILTIN_BNDINT];
42046 case BUILT_IN_CHKP_NARROW:
42047 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
42049 case BUILT_IN_CHKP_SIZEOF:
42050 return ix86_builtins[IX86_BUILTIN_SIZEOF];
42052 case BUILT_IN_CHKP_EXTRACT_LOWER:
42053 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
42055 case BUILT_IN_CHKP_EXTRACT_UPPER:
42056 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
42062 gcc_unreachable ();
42065 /* Helper function for ix86_load_bounds and ix86_store_bounds.
42067 Return an address to be used to load/store bounds for pointer
42070 SLOT_NO is an integer constant holding number of a target
42071 dependent special slot to be used in case SLOT is not a memory.
42073 SPECIAL_BASE is a pointer to be used as a base of fake address
42074 to access special slots in Bounds Table. SPECIAL_BASE[-1],
42075 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
42078 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
42082 /* NULL slot means we pass bounds for pointer not passed to the
42083 function at all. Register slot means we pass pointer in a
42084 register. In both these cases bounds are passed via Bounds
42085 Table. Since we do not have actual pointer stored in memory,
42086 we have to use fake addresses to access Bounds Table. We
42087 start with (special_base - sizeof (void*)) and decrease this
42088 address by pointer size to get addresses for other slots. */
42089 if (!slot || REG_P (slot))
42091 gcc_assert (CONST_INT_P (slot_no));
42092 addr = plus_constant (Pmode, special_base,
42093 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
42095 /* If pointer is passed in a memory then its address is used to
42096 access Bounds Table. */
42097 else if (MEM_P (slot))
42099 addr = XEXP (slot, 0);
42100 if (!register_operand (addr, Pmode))
42101 addr = copy_addr_to_reg (addr);
42104 gcc_unreachable ();
42109 /* Expand pass uses this hook to load bounds for function parameter
42110 PTR passed in SLOT in case its bounds are not passed in a register.
42112 If SLOT is a memory, then bounds are loaded as for regular pointer
42113 loaded from memory. PTR may be NULL in case SLOT is a memory.
42114 In such case value of PTR (if required) may be loaded from SLOT.
42116 If SLOT is NULL or a register then SLOT_NO is an integer constant
42117 holding number of the target dependent special slot which should be
42118 used to obtain bounds.
42120 Return loaded bounds. */
42123 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
42125 rtx reg = gen_reg_rtx (BNDmode);
42128 /* Get address to be used to access Bounds Table. Special slots start
42129 at the location of return address of the current function. */
42130 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
42132 /* Load pointer value from a memory if we don't have it. */
42135 gcc_assert (MEM_P (slot));
42136 ptr = copy_addr_to_reg (slot);
42139 if (!register_operand (ptr, Pmode))
42140 ptr = ix86_zero_extend_to_Pmode (ptr);
42142 emit_insn (BNDmode == BND64mode
42143 ? gen_bnd64_ldx (reg, addr, ptr)
42144 : gen_bnd32_ldx (reg, addr, ptr));
42149 /* Expand pass uses this hook to store BOUNDS for call argument PTR
42150 passed in SLOT in case BOUNDS are not passed in a register.
42152 If SLOT is a memory, then BOUNDS are stored as for regular pointer
42153 stored in memory. PTR may be NULL in case SLOT is a memory.
42154 In such case value of PTR (if required) may be loaded from SLOT.
42156 If SLOT is NULL or a register then SLOT_NO is an integer constant
42157 holding number of the target dependent special slot which should be
42158 used to store BOUNDS. */
42161 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
42165 /* Get address to be used to access Bounds Table. Special slots start
42166 at the location of return address of a called function. */
42167 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
42169 /* Load pointer value from a memory if we don't have it. */
42172 gcc_assert (MEM_P (slot));
42173 ptr = copy_addr_to_reg (slot);
42176 if (!register_operand (ptr, Pmode))
42177 ptr = ix86_zero_extend_to_Pmode (ptr);
42179 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
42180 if (!register_operand (bounds, BNDmode))
42181 bounds = copy_to_mode_reg (BNDmode, bounds);
42183 emit_insn (BNDmode == BND64mode
42184 ? gen_bnd64_stx (addr, ptr, bounds)
42185 : gen_bnd32_stx (addr, ptr, bounds));
42188 /* Load and return bounds returned by function in SLOT. */
42191 ix86_load_returned_bounds (rtx slot)
42195 gcc_assert (REG_P (slot));
42196 res = gen_reg_rtx (BNDmode);
42197 emit_move_insn (res, slot);
42202 /* Store BOUNDS returned by function into SLOT. */
42205 ix86_store_returned_bounds (rtx slot, rtx bounds)
42207 gcc_assert (REG_P (slot));
42208 emit_move_insn (slot, bounds);
42211 /* Returns a function decl for a vectorized version of the combined function
42212 with combined_fn code FN and the result vector type TYPE, or NULL_TREE
42213 if it is not available. */
42216 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
42219 machine_mode in_mode, out_mode;
42222 if (TREE_CODE (type_out) != VECTOR_TYPE
42223 || TREE_CODE (type_in) != VECTOR_TYPE)
42226 out_mode = TYPE_MODE (TREE_TYPE (type_out));
42227 out_n = TYPE_VECTOR_SUBPARTS (type_out);
42228 in_mode = TYPE_MODE (TREE_TYPE (type_in));
42229 in_n = TYPE_VECTOR_SUBPARTS (type_in);
42234 if (out_mode == SFmode && in_mode == SFmode)
42236 if (out_n == 16 && in_n == 16)
42237 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
42244 /* The round insn does not trap on denormals. */
42245 if (flag_trapping_math || !TARGET_ROUND)
42248 if (out_mode == SImode && in_mode == DFmode)
42250 if (out_n == 4 && in_n == 2)
42251 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
42252 else if (out_n == 8 && in_n == 4)
42253 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
42254 else if (out_n == 16 && in_n == 8)
42255 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
42257 if (out_mode == SImode && in_mode == SFmode)
42259 if (out_n == 4 && in_n == 4)
42260 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
42261 else if (out_n == 8 && in_n == 8)
42262 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
42269 /* The round insn does not trap on denormals. */
42270 if (flag_trapping_math || !TARGET_ROUND)
42273 if (out_mode == SImode && in_mode == DFmode)
42275 if (out_n == 4 && in_n == 2)
42276 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
42277 else if (out_n == 8 && in_n == 4)
42278 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
42279 else if (out_n == 16 && in_n == 8)
42280 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
42282 if (out_mode == SImode && in_mode == SFmode)
42284 if (out_n == 4 && in_n == 4)
42285 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
42286 else if (out_n == 8 && in_n == 8)
42287 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
42294 if (out_mode == SImode && in_mode == DFmode)
42296 if (out_n == 4 && in_n == 2)
42297 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
42298 else if (out_n == 8 && in_n == 4)
42299 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
42301 if (out_mode == SImode && in_mode == SFmode)
42303 if (out_n == 4 && in_n == 4)
42304 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
42305 else if (out_n == 8 && in_n == 8)
42306 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
42313 /* The round insn does not trap on denormals. */
42314 if (flag_trapping_math || !TARGET_ROUND)
42317 if (out_mode == SImode && in_mode == DFmode)
42319 if (out_n == 4 && in_n == 2)
42320 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
42321 else if (out_n == 8 && in_n == 4)
42322 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
42323 else if (out_n == 16 && in_n == 8)
42324 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
42326 if (out_mode == SImode && in_mode == SFmode)
42328 if (out_n == 4 && in_n == 4)
42329 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
42330 else if (out_n == 8 && in_n == 8)
42331 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
42336 /* The round insn does not trap on denormals. */
42337 if (flag_trapping_math || !TARGET_ROUND)
42340 if (out_mode == DFmode && in_mode == DFmode)
42342 if (out_n == 2 && in_n == 2)
42343 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
42344 else if (out_n == 4 && in_n == 4)
42345 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
42347 if (out_mode == SFmode && in_mode == SFmode)
42349 if (out_n == 4 && in_n == 4)
42350 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
42351 else if (out_n == 8 && in_n == 8)
42352 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
42357 /* The round insn does not trap on denormals. */
42358 if (flag_trapping_math || !TARGET_ROUND)
42361 if (out_mode == DFmode && in_mode == DFmode)
42363 if (out_n == 2 && in_n == 2)
42364 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
42365 else if (out_n == 4 && in_n == 4)
42366 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
42368 if (out_mode == SFmode && in_mode == SFmode)
42370 if (out_n == 4 && in_n == 4)
42371 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
42372 else if (out_n == 8 && in_n == 8)
42373 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
42378 /* The round insn does not trap on denormals. */
42379 if (flag_trapping_math || !TARGET_ROUND)
42382 if (out_mode == DFmode && in_mode == DFmode)
42384 if (out_n == 2 && in_n == 2)
42385 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
42386 else if (out_n == 4 && in_n == 4)
42387 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
42389 if (out_mode == SFmode && in_mode == SFmode)
42391 if (out_n == 4 && in_n == 4)
42392 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
42393 else if (out_n == 8 && in_n == 8)
42394 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
42399 /* The round insn does not trap on denormals. */
42400 if (flag_trapping_math || !TARGET_ROUND)
42403 if (out_mode == DFmode && in_mode == DFmode)
42405 if (out_n == 2 && in_n == 2)
42406 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
42407 else if (out_n == 4 && in_n == 4)
42408 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
42410 if (out_mode == SFmode && in_mode == SFmode)
42412 if (out_n == 4 && in_n == 4)
42413 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
42414 else if (out_n == 8 && in_n == 8)
42415 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
42420 if (out_mode == DFmode && in_mode == DFmode)
42422 if (out_n == 2 && in_n == 2)
42423 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
42424 if (out_n == 4 && in_n == 4)
42425 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
42427 if (out_mode == SFmode && in_mode == SFmode)
42429 if (out_n == 4 && in_n == 4)
42430 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
42431 if (out_n == 8 && in_n == 8)
42432 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
42440 /* Dispatch to a handler for a vectorization library. */
42441 if (ix86_veclib_handler)
42442 return ix86_veclib_handler (combined_fn (fn), type_out, type_in);
42447 /* Handler for an SVML-style interface to
42448 a library with vectorized intrinsics. */
42451 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
42454 tree fntype, new_fndecl, args;
42457 machine_mode el_mode, in_mode;
42460 /* The SVML is suitable for unsafe math only. */
42461 if (!flag_unsafe_math_optimizations)
42464 el_mode = TYPE_MODE (TREE_TYPE (type_out));
42465 n = TYPE_VECTOR_SUBPARTS (type_out);
42466 in_mode = TYPE_MODE (TREE_TYPE (type_in));
42467 in_n = TYPE_VECTOR_SUBPARTS (type_in);
42468 if (el_mode != in_mode
42492 if ((el_mode != DFmode || n != 2)
42493 && (el_mode != SFmode || n != 4))
42501 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
42502 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
42504 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
42505 strcpy (name, "vmlsLn4");
42506 else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
42507 strcpy (name, "vmldLn2");
42510 sprintf (name, "vmls%s", bname+10);
42511 name[strlen (name)-1] = '4';
42514 sprintf (name, "vmld%s2", bname+10);
42516 /* Convert to uppercase. */
42520 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
42524 fntype = build_function_type_list (type_out, type_in, NULL);
42526 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
42528 /* Build a function declaration for the vectorized function. */
42529 new_fndecl = build_decl (BUILTINS_LOCATION,
42530 FUNCTION_DECL, get_identifier (name), fntype);
42531 TREE_PUBLIC (new_fndecl) = 1;
42532 DECL_EXTERNAL (new_fndecl) = 1;
42533 DECL_IS_NOVOPS (new_fndecl) = 1;
42534 TREE_READONLY (new_fndecl) = 1;
42539 /* Handler for an ACML-style interface to
42540 a library with vectorized intrinsics. */
42543 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
42545 char name[20] = "__vr.._";
42546 tree fntype, new_fndecl, args;
42549 machine_mode el_mode, in_mode;
42552 /* The ACML is 64bits only and suitable for unsafe math only as
42553 it does not correctly support parts of IEEE with the required
42554 precision such as denormals. */
42556 || !flag_unsafe_math_optimizations)
42559 el_mode = TYPE_MODE (TREE_TYPE (type_out));
42560 n = TYPE_VECTOR_SUBPARTS (type_out);
42561 in_mode = TYPE_MODE (TREE_TYPE (type_in));
42562 in_n = TYPE_VECTOR_SUBPARTS (type_in);
42563 if (el_mode != in_mode
42575 if (el_mode == DFmode && n == 2)
42580 else if (el_mode == SFmode && n == 4)
42593 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
42594 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
42595 sprintf (name + 7, "%s", bname+10);
42598 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
42602 fntype = build_function_type_list (type_out, type_in, NULL);
42604 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
42606 /* Build a function declaration for the vectorized function. */
42607 new_fndecl = build_decl (BUILTINS_LOCATION,
42608 FUNCTION_DECL, get_identifier (name), fntype);
42609 TREE_PUBLIC (new_fndecl) = 1;
42610 DECL_EXTERNAL (new_fndecl) = 1;
42611 DECL_IS_NOVOPS (new_fndecl) = 1;
42612 TREE_READONLY (new_fndecl) = 1;
42617 /* Returns a decl of a function that implements gather load with
42618 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
42619 Return NULL_TREE if it is not available. */
42622 ix86_vectorize_builtin_gather (const_tree mem_vectype,
42623 const_tree index_type, int scale)
42626 enum ix86_builtins code;
42631 if ((TREE_CODE (index_type) != INTEGER_TYPE
42632 && !POINTER_TYPE_P (index_type))
42633 || (TYPE_MODE (index_type) != SImode
42634 && TYPE_MODE (index_type) != DImode))
42637 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
42640 /* v*gather* insn sign extends index to pointer mode. */
42641 if (TYPE_PRECISION (index_type) < POINTER_SIZE
42642 && TYPE_UNSIGNED (index_type))
42647 || (scale & (scale - 1)) != 0)
42650 si = TYPE_MODE (index_type) == SImode;
42651 switch (TYPE_MODE (mem_vectype))
42654 if (TARGET_AVX512VL)
42655 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
42657 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
42660 if (TARGET_AVX512VL)
42661 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
42663 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
42666 if (TARGET_AVX512VL)
42667 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
42669 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
42672 if (TARGET_AVX512VL)
42673 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
42675 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
42678 if (TARGET_AVX512VL)
42679 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
42681 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
42684 if (TARGET_AVX512VL)
42685 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
42687 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
42690 if (TARGET_AVX512VL)
42691 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
42693 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
42696 if (TARGET_AVX512VL)
42697 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
42699 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
42702 if (TARGET_AVX512F)
42703 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
42708 if (TARGET_AVX512F)
42709 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
42714 if (TARGET_AVX512F)
42715 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
42720 if (TARGET_AVX512F)
42721 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
42729 return ix86_get_builtin (code);
42732 /* Returns a decl of a function that implements scatter store with
42733 register type VECTYPE and index type INDEX_TYPE and SCALE.
42734 Return NULL_TREE if it is not available. */
42737 ix86_vectorize_builtin_scatter (const_tree vectype,
42738 const_tree index_type, int scale)
42741 enum ix86_builtins code;
42743 if (!TARGET_AVX512F)
42746 if ((TREE_CODE (index_type) != INTEGER_TYPE
42747 && !POINTER_TYPE_P (index_type))
42748 || (TYPE_MODE (index_type) != SImode
42749 && TYPE_MODE (index_type) != DImode))
42752 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
42755 /* v*scatter* insn sign extends index to pointer mode. */
42756 if (TYPE_PRECISION (index_type) < POINTER_SIZE
42757 && TYPE_UNSIGNED (index_type))
42760 /* Scale can be 1, 2, 4 or 8. */
42763 || (scale & (scale - 1)) != 0)
42766 si = TYPE_MODE (index_type) == SImode;
42767 switch (TYPE_MODE (vectype))
42770 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
42773 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
42776 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
42779 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
42785 return ix86_builtins[code];
42788 /* Return true if it is safe to use the rsqrt optabs to optimize
42794 return (TARGET_SSE_MATH
42795 && flag_finite_math_only
42796 && !flag_trapping_math
42797 && flag_unsafe_math_optimizations);
42800 /* Returns a code for a target-specific builtin that implements
42801 reciprocal of the function, or NULL_TREE if not available. */
42804 ix86_builtin_reciprocal (tree fndecl)
42806 switch (DECL_FUNCTION_CODE (fndecl))
42808 /* Vectorized version of sqrt to rsqrt conversion. */
42809 case IX86_BUILTIN_SQRTPS_NR:
42810 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
42812 case IX86_BUILTIN_SQRTPS_NR256:
42813 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
42820 /* Helper for avx_vpermilps256_operand et al. This is also used by
42821 the expansion functions to turn the parallel back into a mask.
42822 The return value is 0 for no match and the imm8+1 for a match. */
42825 avx_vpermilp_parallel (rtx par, machine_mode mode)
42827 unsigned i, nelt = GET_MODE_NUNITS (mode);
42829 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
42831 if (XVECLEN (par, 0) != (int) nelt)
42834 /* Validate that all of the elements are constants, and not totally
42835 out of range. Copy the data into an integral array to make the
42836 subsequent checks easier. */
42837 for (i = 0; i < nelt; ++i)
42839 rtx er = XVECEXP (par, 0, i);
42840 unsigned HOST_WIDE_INT ei;
42842 if (!CONST_INT_P (er))
42853 /* In the 512-bit DFmode case, we can only move elements within
42854 a 128-bit lane. First fill the second part of the mask,
42856 for (i = 4; i < 6; ++i)
42858 if (ipar[i] < 4 || ipar[i] >= 6)
42860 mask |= (ipar[i] - 4) << i;
42862 for (i = 6; i < 8; ++i)
42866 mask |= (ipar[i] - 6) << i;
42871 /* In the 256-bit DFmode case, we can only move elements within
42873 for (i = 0; i < 2; ++i)
42877 mask |= ipar[i] << i;
42879 for (i = 2; i < 4; ++i)
42883 mask |= (ipar[i] - 2) << i;
42888 /* In 512 bit SFmode case, permutation in the upper 256 bits
42889 must mirror the permutation in the lower 256-bits. */
42890 for (i = 0; i < 8; ++i)
42891 if (ipar[i] + 8 != ipar[i + 8])
42896 /* In 256 bit SFmode case, we have full freedom of
42897 movement within the low 128-bit lane, but the high 128-bit
42898 lane must mirror the exact same pattern. */
42899 for (i = 0; i < 4; ++i)
42900 if (ipar[i] + 4 != ipar[i + 4])
42907 /* In the 128-bit case, we've full freedom in the placement of
42908 the elements from the source operand. */
42909 for (i = 0; i < nelt; ++i)
42910 mask |= ipar[i] << (i * (nelt / 2));
42914 gcc_unreachable ();
42917 /* Make sure success has a non-zero value by adding one. */
42921 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
42922 the expansion functions to turn the parallel back into a mask.
42923 The return value is 0 for no match and the imm8+1 for a match. */
42926 avx_vperm2f128_parallel (rtx par, machine_mode mode)
42928 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
42930 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
42932 if (XVECLEN (par, 0) != (int) nelt)
42935 /* Validate that all of the elements are constants, and not totally
42936 out of range. Copy the data into an integral array to make the
42937 subsequent checks easier. */
42938 for (i = 0; i < nelt; ++i)
42940 rtx er = XVECEXP (par, 0, i);
42941 unsigned HOST_WIDE_INT ei;
42943 if (!CONST_INT_P (er))
42946 if (ei >= 2 * nelt)
42951 /* Validate that the halves of the permute are halves. */
42952 for (i = 0; i < nelt2 - 1; ++i)
42953 if (ipar[i] + 1 != ipar[i + 1])
42955 for (i = nelt2; i < nelt - 1; ++i)
42956 if (ipar[i] + 1 != ipar[i + 1])
42959 /* Reconstruct the mask. */
42960 for (i = 0; i < 2; ++i)
42962 unsigned e = ipar[i * nelt2];
42966 mask |= e << (i * 4);
42969 /* Make sure success has a non-zero value by adding one. */
42973 /* Return a register priority for hard reg REGNO. */
42975 ix86_register_priority (int hard_regno)
42977 /* ebp and r13 as the base always wants a displacement, r12 as the
42978 base always wants an index. So discourage their usage in an
42980 if (hard_regno == R12_REG || hard_regno == R13_REG)
42982 if (hard_regno == BP_REG)
42984 /* New x86-64 int registers result in bigger code size. Discourage
42986 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
42988 /* New x86-64 SSE registers result in bigger code size. Discourage
42990 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
42992 /* Usage of AX register results in smaller code. Prefer it. */
42993 if (hard_regno == AX_REG)
42998 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
43000 Put float CONST_DOUBLE in the constant pool instead of fp regs.
43001 QImode must go into class Q_REGS.
43002 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
43003 movdf to do mem-to-mem moves through integer regs. */
43006 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
43008 machine_mode mode = GET_MODE (x);
43010 /* We're only allowed to return a subclass of CLASS. Many of the
43011 following checks fail for NO_REGS, so eliminate that early. */
43012 if (regclass == NO_REGS)
43015 /* All classes can load zeros. */
43016 if (x == CONST0_RTX (mode))
43019 /* Force constants into memory if we are loading a (nonzero) constant into
43020 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
43021 instructions to load from a constant. */
43023 && (MAYBE_MMX_CLASS_P (regclass)
43024 || MAYBE_SSE_CLASS_P (regclass)
43025 || MAYBE_MASK_CLASS_P (regclass)))
43028 /* Prefer SSE regs only, if we can use them for math. */
43029 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
43030 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
43032 /* Floating-point constants need more complex checks. */
43033 if (CONST_DOUBLE_P (x))
43035 /* General regs can load everything. */
43036 if (reg_class_subset_p (regclass, GENERAL_REGS))
43039 /* Floats can load 0 and 1 plus some others. Note that we eliminated
43040 zero above. We only want to wind up preferring 80387 registers if
43041 we plan on doing computation with them. */
43043 && standard_80387_constant_p (x) > 0)
43045 /* Limit class to non-sse. */
43046 if (regclass == FLOAT_SSE_REGS)
43048 if (regclass == FP_TOP_SSE_REGS)
43050 if (regclass == FP_SECOND_SSE_REGS)
43051 return FP_SECOND_REG;
43052 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
43059 /* Generally when we see PLUS here, it's the function invariant
43060 (plus soft-fp const_int). Which can only be computed into general
43062 if (GET_CODE (x) == PLUS)
43063 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
43065 /* QImode constants are easy to load, but non-constant QImode data
43066 must go into Q_REGS. */
43067 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
43069 if (reg_class_subset_p (regclass, Q_REGS))
43071 if (reg_class_subset_p (Q_REGS, regclass))
43079 /* Discourage putting floating-point values in SSE registers unless
43080 SSE math is being used, and likewise for the 387 registers. */
43082 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
43084 machine_mode mode = GET_MODE (x);
43086 /* Restrict the output reload class to the register bank that we are doing
43087 math on. If we would like not to return a subset of CLASS, reject this
43088 alternative: if reload cannot do this, it will still use its choice. */
43089 mode = GET_MODE (x);
43090 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
43091 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
43093 if (X87_FLOAT_MODE_P (mode))
43095 if (regclass == FP_TOP_SSE_REGS)
43097 else if (regclass == FP_SECOND_SSE_REGS)
43098 return FP_SECOND_REG;
43100 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
43107 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
43108 machine_mode mode, secondary_reload_info *sri)
43110 /* Double-word spills from general registers to non-offsettable memory
43111 references (zero-extended addresses) require special handling. */
43114 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
43115 && INTEGER_CLASS_P (rclass)
43116 && !offsettable_memref_p (x))
43119 ? CODE_FOR_reload_noff_load
43120 : CODE_FOR_reload_noff_store);
43121 /* Add the cost of moving address to a temporary. */
43122 sri->extra_cost = 1;
43127 /* QImode spills from non-QI registers require
43128 intermediate register on 32bit targets. */
43130 && (MAYBE_MASK_CLASS_P (rclass)
43131 || (!TARGET_64BIT && !in_p
43132 && INTEGER_CLASS_P (rclass)
43133 && MAYBE_NON_Q_CLASS_P (rclass))))
43142 if (regno >= FIRST_PSEUDO_REGISTER || SUBREG_P (x))
43143 regno = true_regnum (x);
43145 /* Return Q_REGS if the operand is in memory. */
43150 /* This condition handles corner case where an expression involving
43151 pointers gets vectorized. We're trying to use the address of a
43152 stack slot as a vector initializer.
43154 (set (reg:V2DI 74 [ vect_cst_.2 ])
43155 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
43157 Eventually frame gets turned into sp+offset like this:
43159 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
43160 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
43161 (const_int 392 [0x188]))))
43163 That later gets turned into:
43165 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
43166 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
43167 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
43169 We'll have the following reload recorded:
43171 Reload 0: reload_in (DI) =
43172 (plus:DI (reg/f:DI 7 sp)
43173 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
43174 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
43175 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
43176 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
43177 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
43178 reload_reg_rtx: (reg:V2DI 22 xmm1)
43180 Which isn't going to work since SSE instructions can't handle scalar
43181 additions. Returning GENERAL_REGS forces the addition into integer
43182 register and reload can handle subsequent reloads without problems. */
43184 if (in_p && GET_CODE (x) == PLUS
43185 && SSE_CLASS_P (rclass)
43186 && SCALAR_INT_MODE_P (mode))
43187 return GENERAL_REGS;
43192 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
43195 ix86_class_likely_spilled_p (reg_class_t rclass)
43206 case SSE_FIRST_REG:
43208 case FP_SECOND_REG:
43219 /* If we are copying between general and FP registers, we need a memory
43220 location. The same is true for SSE and MMX registers.
43222 To optimize register_move_cost performance, allow inline variant.
43224 The macro can't work reliably when one of the CLASSES is class containing
43225 registers from multiple units (SSE, MMX, integer). We avoid this by never
43226 combining those units in single alternative in the machine description.
43227 Ensure that this constraint holds to avoid unexpected surprises.
43229 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
43230 enforce these sanity checks. */
43233 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
43234 machine_mode mode, int strict)
43236 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
43238 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
43239 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
43240 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
43241 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
43242 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
43243 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
43245 gcc_assert (!strict || lra_in_progress);
43249 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
43252 /* Between mask and general, we have moves no larger than word size. */
43253 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
43254 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
43257 /* ??? This is a lie. We do have moves between mmx/general, and for
43258 mmx/sse2. But by saying we need secondary memory we discourage the
43259 register allocator from using the mmx registers unless needed. */
43260 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
43263 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
43265 /* SSE1 doesn't have any direct moves from other classes. */
43269 /* If the target says that inter-unit moves are more expensive
43270 than moving through memory, then don't generate them. */
43271 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
43272 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
43275 /* Between SSE and general, we have moves no larger than word size. */
43276 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
43284 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
43285 machine_mode mode, int strict)
43287 return inline_secondary_memory_needed (class1, class2, mode, strict);
43290 /* Implement the TARGET_CLASS_MAX_NREGS hook.
43292 On the 80386, this is the size of MODE in words,
43293 except in the FP regs, where a single reg is always enough. */
43295 static unsigned char
43296 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
43298 if (MAYBE_INTEGER_CLASS_P (rclass))
43300 if (mode == XFmode)
43301 return (TARGET_64BIT ? 2 : 3);
43302 else if (mode == XCmode)
43303 return (TARGET_64BIT ? 4 : 6);
43305 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
43309 if (COMPLEX_MODE_P (mode))
43316 /* Return true if the registers in CLASS cannot represent the change from
43317 modes FROM to TO. */
43320 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
43321 enum reg_class regclass)
43326 /* x87 registers can't do subreg at all, as all values are reformatted
43327 to extended precision. */
43328 if (MAYBE_FLOAT_CLASS_P (regclass))
43331 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
43333 /* Vector registers do not support QI or HImode loads. If we don't
43334 disallow a change to these modes, reload will assume it's ok to
43335 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
43336 the vec_dupv4hi pattern. */
43337 if (GET_MODE_SIZE (from) < 4)
43344 /* Return the cost of moving data of mode M between a
43345 register and memory. A value of 2 is the default; this cost is
43346 relative to those in `REGISTER_MOVE_COST'.
43348 This function is used extensively by register_move_cost that is used to
43349 build tables at startup. Make it inline in this case.
43350 When IN is 2, return maximum of in and out move cost.
43352 If moving between registers and memory is more expensive than
43353 between two registers, you should define this macro to express the
43356 Model also increased moving costs of QImode registers in non
43360 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
43364 if (FLOAT_CLASS_P (regclass))
43382 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
43383 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
43385 if (SSE_CLASS_P (regclass))
43388 switch (GET_MODE_SIZE (mode))
43403 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
43404 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
43406 if (MMX_CLASS_P (regclass))
43409 switch (GET_MODE_SIZE (mode))
43421 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
43422 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
43424 switch (GET_MODE_SIZE (mode))
43427 if (Q_CLASS_P (regclass) || TARGET_64BIT)
43430 return ix86_cost->int_store[0];
43431 if (TARGET_PARTIAL_REG_DEPENDENCY
43432 && optimize_function_for_speed_p (cfun))
43433 cost = ix86_cost->movzbl_load;
43435 cost = ix86_cost->int_load[0];
43437 return MAX (cost, ix86_cost->int_store[0]);
43443 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
43445 return ix86_cost->movzbl_load;
43447 return ix86_cost->int_store[0] + 4;
43452 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
43453 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
43455 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
43456 if (mode == TFmode)
43459 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
43461 cost = ix86_cost->int_load[2];
43463 cost = ix86_cost->int_store[2];
43464 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
43469 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
43472 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
43476 /* Return the cost of moving data from a register in class CLASS1 to
43477 one in class CLASS2.
43479 It is not required that the cost always equal 2 when FROM is the same as TO;
43480 on some machines it is expensive to move between registers if they are not
43481 general registers. */
43484 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
43485 reg_class_t class2_i)
43487 enum reg_class class1 = (enum reg_class) class1_i;
43488 enum reg_class class2 = (enum reg_class) class2_i;
43490 /* In case we require secondary memory, compute cost of the store followed
43491 by load. In order to avoid bad register allocation choices, we need
43492 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
43494 if (inline_secondary_memory_needed (class1, class2, mode, 0))
43498 cost += inline_memory_move_cost (mode, class1, 2);
43499 cost += inline_memory_move_cost (mode, class2, 2);
43501 /* In case of copying from general_purpose_register we may emit multiple
43502 stores followed by single load causing memory size mismatch stall.
43503 Count this as arbitrarily high cost of 20. */
43504 if (targetm.class_max_nregs (class1, mode)
43505 > targetm.class_max_nregs (class2, mode))
43508 /* In the case of FP/MMX moves, the registers actually overlap, and we
43509 have to switch modes in order to treat them differently. */
43510 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
43511 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
43517 /* Moves between SSE/MMX and integer unit are expensive. */
43518 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
43519 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
43521 /* ??? By keeping returned value relatively high, we limit the number
43522 of moves between integer and MMX/SSE registers for all targets.
43523 Additionally, high value prevents problem with x86_modes_tieable_p(),
43524 where integer modes in MMX/SSE registers are not tieable
43525 because of missing QImode and HImode moves to, from or between
43526 MMX/SSE registers. */
43527 return MAX (8, ix86_cost->mmxsse_to_integer);
43529 if (MAYBE_FLOAT_CLASS_P (class1))
43530 return ix86_cost->fp_move;
43531 if (MAYBE_SSE_CLASS_P (class1))
43532 return ix86_cost->sse_move;
43533 if (MAYBE_MMX_CLASS_P (class1))
43534 return ix86_cost->mmx_move;
43538 /* Return TRUE if hard register REGNO can hold a value of machine-mode
43542 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
43544 /* Flags and only flags can only hold CCmode values. */
43545 if (CC_REGNO_P (regno))
43546 return GET_MODE_CLASS (mode) == MODE_CC;
43547 if (GET_MODE_CLASS (mode) == MODE_CC
43548 || GET_MODE_CLASS (mode) == MODE_RANDOM
43549 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
43551 if (STACK_REGNO_P (regno))
43552 return VALID_FP_MODE_P (mode);
43553 if (MASK_REGNO_P (regno))
43554 return (VALID_MASK_REG_MODE (mode)
43555 || (TARGET_AVX512BW
43556 && VALID_MASK_AVX512BW_MODE (mode)));
43557 if (BND_REGNO_P (regno))
43558 return VALID_BND_REG_MODE (mode);
43559 if (SSE_REGNO_P (regno))
43561 /* We implement the move patterns for all vector modes into and
43562 out of SSE registers, even when no operation instructions
43565 /* For AVX-512 we allow, regardless of regno:
43567 - any of 512-bit wide vector mode
43568 - any scalar mode. */
43571 || VALID_AVX512F_REG_MODE (mode)
43572 || VALID_AVX512F_SCALAR_MODE (mode)))
43575 /* TODO check for QI/HI scalars. */
43576 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
43577 if (TARGET_AVX512VL
43580 || VALID_AVX256_REG_MODE (mode)
43581 || VALID_AVX512VL_128_REG_MODE (mode)))
43584 /* xmm16-xmm31 are only available for AVX-512. */
43585 if (EXT_REX_SSE_REGNO_P (regno))
43588 /* OImode and AVX modes are available only when AVX is enabled. */
43589 return ((TARGET_AVX
43590 && VALID_AVX256_REG_OR_OI_MODE (mode))
43591 || VALID_SSE_REG_MODE (mode)
43592 || VALID_SSE2_REG_MODE (mode)
43593 || VALID_MMX_REG_MODE (mode)
43594 || VALID_MMX_REG_MODE_3DNOW (mode));
43596 if (MMX_REGNO_P (regno))
43598 /* We implement the move patterns for 3DNOW modes even in MMX mode,
43599 so if the register is available at all, then we can move data of
43600 the given mode into or out of it. */
43601 return (VALID_MMX_REG_MODE (mode)
43602 || VALID_MMX_REG_MODE_3DNOW (mode));
43605 if (mode == QImode)
43607 /* Take care for QImode values - they can be in non-QI regs,
43608 but then they do cause partial register stalls. */
43609 if (ANY_QI_REGNO_P (regno))
43611 if (!TARGET_PARTIAL_REG_STALL)
43613 /* LRA checks if the hard register is OK for the given mode.
43614 QImode values can live in non-QI regs, so we allow all
43616 if (lra_in_progress)
43618 return !can_create_pseudo_p ();
43620 /* We handle both integer and floats in the general purpose registers. */
43621 else if (VALID_INT_MODE_P (mode))
43623 else if (VALID_FP_MODE_P (mode))
43625 else if (VALID_DFP_MODE_P (mode))
43627 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
43628 on to use that value in smaller contexts, this can easily force a
43629 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
43630 supporting DImode, allow it. */
43631 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
43637 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
43638 tieable integer mode. */
43641 ix86_tieable_integer_mode_p (machine_mode mode)
43650 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
43653 return TARGET_64BIT;
43660 /* Return true if MODE1 is accessible in a register that can hold MODE2
43661 without copying. That is, all register classes that can hold MODE2
43662 can also hold MODE1. */
43665 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
43667 if (mode1 == mode2)
43670 if (ix86_tieable_integer_mode_p (mode1)
43671 && ix86_tieable_integer_mode_p (mode2))
43674 /* MODE2 being XFmode implies fp stack or general regs, which means we
43675 can tie any smaller floating point modes to it. Note that we do not
43676 tie this with TFmode. */
43677 if (mode2 == XFmode)
43678 return mode1 == SFmode || mode1 == DFmode;
43680 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
43681 that we can tie it with SFmode. */
43682 if (mode2 == DFmode)
43683 return mode1 == SFmode;
43685 /* If MODE2 is only appropriate for an SSE register, then tie with
43686 any other mode acceptable to SSE registers. */
43687 if (GET_MODE_SIZE (mode2) == 32
43688 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
43689 return (GET_MODE_SIZE (mode1) == 32
43690 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
43691 if (GET_MODE_SIZE (mode2) == 16
43692 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
43693 return (GET_MODE_SIZE (mode1) == 16
43694 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
43696 /* If MODE2 is appropriate for an MMX register, then tie
43697 with any other mode acceptable to MMX registers. */
43698 if (GET_MODE_SIZE (mode2) == 8
43699 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
43700 return (GET_MODE_SIZE (mode1) == 8
43701 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
43706 /* Return the cost of moving between two registers of mode MODE. */
43709 ix86_set_reg_reg_cost (machine_mode mode)
43711 unsigned int units = UNITS_PER_WORD;
43713 switch (GET_MODE_CLASS (mode))
43719 units = GET_MODE_SIZE (CCmode);
43723 if ((TARGET_SSE && mode == TFmode)
43724 || (TARGET_80387 && mode == XFmode)
43725 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
43726 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
43727 units = GET_MODE_SIZE (mode);
43730 case MODE_COMPLEX_FLOAT:
43731 if ((TARGET_SSE && mode == TCmode)
43732 || (TARGET_80387 && mode == XCmode)
43733 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
43734 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
43735 units = GET_MODE_SIZE (mode);
43738 case MODE_VECTOR_INT:
43739 case MODE_VECTOR_FLOAT:
43740 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
43741 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
43742 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
43743 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
43744 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
43745 units = GET_MODE_SIZE (mode);
43748 /* Return the cost of moving between two registers of mode MODE,
43749 assuming that the move will be in pieces of at most UNITS bytes. */
43750 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
43753 /* Compute a (partial) cost for rtx X. Return true if the complete
43754 cost has been computed, and false if subexpressions should be
43755 scanned. In either case, *TOTAL contains the cost result. */
43758 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
43759 int *total, bool speed)
43762 enum rtx_code code = GET_CODE (x);
43763 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
43764 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
43769 if (register_operand (SET_DEST (x), VOIDmode)
43770 && reg_or_0_operand (SET_SRC (x), VOIDmode))
43772 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
43781 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
43783 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
43785 else if (flag_pic && SYMBOLIC_CONST (x)
43787 && (GET_CODE (x) == LABEL_REF
43788 || (GET_CODE (x) == SYMBOL_REF
43789 && SYMBOL_REF_LOCAL_P (x))))
43790 /* Use 0 cost for CONST to improve its propagation. */
43791 && (TARGET_64BIT || GET_CODE (x) != CONST))
43797 case CONST_WIDE_INT:
43802 switch (standard_80387_constant_p (x))
43807 default: /* Other constants */
43814 if (SSE_FLOAT_MODE_P (mode))
43817 switch (standard_sse_constant_p (x))
43821 case 1: /* 0: xor eliminates false dependency */
43824 default: /* -1: cmp contains false dependency */
43829 /* Fall back to (MEM (SYMBOL_REF)), since that's where
43830 it'll probably end up. Add a penalty for size. */
43831 *total = (COSTS_N_INSNS (1)
43832 + (flag_pic != 0 && !TARGET_64BIT)
43833 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
43837 /* The zero extensions is often completely free on x86_64, so make
43838 it as cheap as possible. */
43839 if (TARGET_64BIT && mode == DImode
43840 && GET_MODE (XEXP (x, 0)) == SImode)
43842 else if (TARGET_ZERO_EXTEND_WITH_AND)
43843 *total = cost->add;
43845 *total = cost->movzx;
43849 *total = cost->movsx;
43853 if (SCALAR_INT_MODE_P (mode)
43854 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
43855 && CONST_INT_P (XEXP (x, 1)))
43857 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
43860 *total = cost->add;
43863 if ((value == 2 || value == 3)
43864 && cost->lea <= cost->shift_const)
43866 *total = cost->lea;
43876 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
43878 /* ??? Should be SSE vector operation cost. */
43879 /* At least for published AMD latencies, this really is the same
43880 as the latency for a simple fpu operation like fabs. */
43881 /* V*QImode is emulated with 1-11 insns. */
43882 if (mode == V16QImode || mode == V32QImode)
43885 if (TARGET_XOP && mode == V16QImode)
43887 /* For XOP we use vpshab, which requires a broadcast of the
43888 value to the variable shift insn. For constants this
43889 means a V16Q const in mem; even when we can perform the
43890 shift with one insn set the cost to prefer paddb. */
43891 if (CONSTANT_P (XEXP (x, 1)))
43893 *total = (cost->fabs
43894 + rtx_cost (XEXP (x, 0), mode, code, 0, speed)
43895 + (speed ? 2 : COSTS_N_BYTES (16)));
43900 else if (TARGET_SSSE3)
43902 *total = cost->fabs * count;
43905 *total = cost->fabs;
43907 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
43909 if (CONST_INT_P (XEXP (x, 1)))
43911 if (INTVAL (XEXP (x, 1)) > 32)
43912 *total = cost->shift_const + COSTS_N_INSNS (2);
43914 *total = cost->shift_const * 2;
43918 if (GET_CODE (XEXP (x, 1)) == AND)
43919 *total = cost->shift_var * 2;
43921 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
43926 if (CONST_INT_P (XEXP (x, 1)))
43927 *total = cost->shift_const;
43928 else if (SUBREG_P (XEXP (x, 1))
43929 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
43931 /* Return the cost after shift-and truncation. */
43932 *total = cost->shift_var;
43936 *total = cost->shift_var;
43944 gcc_assert (FLOAT_MODE_P (mode));
43945 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
43947 /* ??? SSE scalar/vector cost should be used here. */
43948 /* ??? Bald assumption that fma has the same cost as fmul. */
43949 *total = cost->fmul;
43950 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
43952 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
43954 if (GET_CODE (sub) == NEG)
43955 sub = XEXP (sub, 0);
43956 *total += rtx_cost (sub, mode, FMA, 0, speed);
43959 if (GET_CODE (sub) == NEG)
43960 sub = XEXP (sub, 0);
43961 *total += rtx_cost (sub, mode, FMA, 2, speed);
43966 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
43968 /* ??? SSE scalar cost should be used here. */
43969 *total = cost->fmul;
43972 else if (X87_FLOAT_MODE_P (mode))
43974 *total = cost->fmul;
43977 else if (FLOAT_MODE_P (mode))
43979 /* ??? SSE vector cost should be used here. */
43980 *total = cost->fmul;
43983 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
43985 /* V*QImode is emulated with 7-13 insns. */
43986 if (mode == V16QImode || mode == V32QImode)
43989 if (TARGET_XOP && mode == V16QImode)
43991 else if (TARGET_SSSE3)
43993 *total = cost->fmul * 2 + cost->fabs * extra;
43995 /* V*DImode is emulated with 5-8 insns. */
43996 else if (mode == V2DImode || mode == V4DImode)
43998 if (TARGET_XOP && mode == V2DImode)
43999 *total = cost->fmul * 2 + cost->fabs * 3;
44001 *total = cost->fmul * 3 + cost->fabs * 5;
44003 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
44004 insns, including two PMULUDQ. */
44005 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
44006 *total = cost->fmul * 2 + cost->fabs * 5;
44008 *total = cost->fmul;
44013 rtx op0 = XEXP (x, 0);
44014 rtx op1 = XEXP (x, 1);
44016 if (CONST_INT_P (XEXP (x, 1)))
44018 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
44019 for (nbits = 0; value != 0; value &= value - 1)
44023 /* This is arbitrary. */
44026 /* Compute costs correctly for widening multiplication. */
44027 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
44028 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
44029 == GET_MODE_SIZE (mode))
44031 int is_mulwiden = 0;
44032 machine_mode inner_mode = GET_MODE (op0);
44034 if (GET_CODE (op0) == GET_CODE (op1))
44035 is_mulwiden = 1, op1 = XEXP (op1, 0);
44036 else if (CONST_INT_P (op1))
44038 if (GET_CODE (op0) == SIGN_EXTEND)
44039 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
44042 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
44046 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
44049 *total = (cost->mult_init[MODE_INDEX (mode)]
44050 + nbits * cost->mult_bit
44051 + rtx_cost (op0, mode, outer_code, opno, speed)
44052 + rtx_cost (op1, mode, outer_code, opno, speed));
44061 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44062 /* ??? SSE cost should be used here. */
44063 *total = cost->fdiv;
44064 else if (X87_FLOAT_MODE_P (mode))
44065 *total = cost->fdiv;
44066 else if (FLOAT_MODE_P (mode))
44067 /* ??? SSE vector cost should be used here. */
44068 *total = cost->fdiv;
44070 *total = cost->divide[MODE_INDEX (mode)];
44074 if (GET_MODE_CLASS (mode) == MODE_INT
44075 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
44077 if (GET_CODE (XEXP (x, 0)) == PLUS
44078 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
44079 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
44080 && CONSTANT_P (XEXP (x, 1)))
44082 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
44083 if (val == 2 || val == 4 || val == 8)
44085 *total = cost->lea;
44086 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
44087 outer_code, opno, speed);
44088 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
44089 outer_code, opno, speed);
44090 *total += rtx_cost (XEXP (x, 1), mode,
44091 outer_code, opno, speed);
44095 else if (GET_CODE (XEXP (x, 0)) == MULT
44096 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
44098 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
44099 if (val == 2 || val == 4 || val == 8)
44101 *total = cost->lea;
44102 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
44103 outer_code, opno, speed);
44104 *total += rtx_cost (XEXP (x, 1), mode,
44105 outer_code, opno, speed);
44109 else if (GET_CODE (XEXP (x, 0)) == PLUS)
44111 *total = cost->lea;
44112 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
44113 outer_code, opno, speed);
44114 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
44115 outer_code, opno, speed);
44116 *total += rtx_cost (XEXP (x, 1), mode,
44117 outer_code, opno, speed);
44124 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44126 /* ??? SSE cost should be used here. */
44127 *total = cost->fadd;
44130 else if (X87_FLOAT_MODE_P (mode))
44132 *total = cost->fadd;
44135 else if (FLOAT_MODE_P (mode))
44137 /* ??? SSE vector cost should be used here. */
44138 *total = cost->fadd;
44146 if (GET_MODE_CLASS (mode) == MODE_INT
44147 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
44149 *total = (cost->add * 2
44150 + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
44151 << (GET_MODE (XEXP (x, 0)) != DImode))
44152 + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
44153 << (GET_MODE (XEXP (x, 1)) != DImode)));
44159 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44161 /* ??? SSE cost should be used here. */
44162 *total = cost->fchs;
44165 else if (X87_FLOAT_MODE_P (mode))
44167 *total = cost->fchs;
44170 else if (FLOAT_MODE_P (mode))
44172 /* ??? SSE vector cost should be used here. */
44173 *total = cost->fchs;
44179 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
44181 /* ??? Should be SSE vector operation cost. */
44182 /* At least for published AMD latencies, this really is the same
44183 as the latency for a simple fpu operation like fabs. */
44184 *total = cost->fabs;
44186 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
44187 *total = cost->add * 2;
44189 *total = cost->add;
44193 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
44194 && XEXP (XEXP (x, 0), 1) == const1_rtx
44195 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
44196 && XEXP (x, 1) == const0_rtx)
44198 /* This kind of construct is implemented using test[bwl].
44199 Treat it as if we had an AND. */
44200 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
44201 *total = (cost->add
44202 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
44204 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
44208 /* The embedded comparison operand is completely free. */
44209 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
44210 && XEXP (x, 1) == const0_rtx)
44216 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
44221 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44222 /* ??? SSE cost should be used here. */
44223 *total = cost->fabs;
44224 else if (X87_FLOAT_MODE_P (mode))
44225 *total = cost->fabs;
44226 else if (FLOAT_MODE_P (mode))
44227 /* ??? SSE vector cost should be used here. */
44228 *total = cost->fabs;
44232 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44233 /* ??? SSE cost should be used here. */
44234 *total = cost->fsqrt;
44235 else if (X87_FLOAT_MODE_P (mode))
44236 *total = cost->fsqrt;
44237 else if (FLOAT_MODE_P (mode))
44238 /* ??? SSE vector cost should be used here. */
44239 *total = cost->fsqrt;
44243 if (XINT (x, 1) == UNSPEC_TP)
44249 case VEC_DUPLICATE:
44250 /* ??? Assume all of these vector manipulation patterns are
44251 recognizable. In which case they all pretty much have the
44253 *total = cost->fabs;
44256 mask = XEXP (x, 2);
44257 /* This is masked instruction, assume the same cost,
44258 as nonmasked variant. */
44259 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
44260 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
44262 *total = cost->fabs;
44272 static int current_machopic_label_num;
44274 /* Given a symbol name and its associated stub, write out the
44275 definition of the stub. */
44278 machopic_output_stub (FILE *file, const char *symb, const char *stub)
44280 unsigned int length;
44281 char *binder_name, *symbol_name, lazy_ptr_name[32];
44282 int label = ++current_machopic_label_num;
44284 /* For 64-bit we shouldn't get here. */
44285 gcc_assert (!TARGET_64BIT);
44287 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
44288 symb = targetm.strip_name_encoding (symb);
44290 length = strlen (stub);
44291 binder_name = XALLOCAVEC (char, length + 32);
44292 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
44294 length = strlen (symb);
44295 symbol_name = XALLOCAVEC (char, length + 32);
44296 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
44298 sprintf (lazy_ptr_name, "L%d$lz", label);
44300 if (MACHOPIC_ATT_STUB)
44301 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
44302 else if (MACHOPIC_PURE)
44303 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
44305 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
44307 fprintf (file, "%s:\n", stub);
44308 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
44310 if (MACHOPIC_ATT_STUB)
44312 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
44314 else if (MACHOPIC_PURE)
44317 /* 25-byte PIC stub using "CALL get_pc_thunk". */
44318 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
44319 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
44320 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
44321 label, lazy_ptr_name, label);
44322 fprintf (file, "\tjmp\t*%%ecx\n");
44325 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
44327 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
44328 it needs no stub-binding-helper. */
44329 if (MACHOPIC_ATT_STUB)
44332 fprintf (file, "%s:\n", binder_name);
44336 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
44337 fprintf (file, "\tpushl\t%%ecx\n");
44340 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
44342 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
44344 /* N.B. Keep the correspondence of these
44345 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
44346 old-pic/new-pic/non-pic stubs; altering this will break
44347 compatibility with existing dylibs. */
44350 /* 25-byte PIC stub using "CALL get_pc_thunk". */
44351 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
44354 /* 16-byte -mdynamic-no-pic stub. */
44355 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
44357 fprintf (file, "%s:\n", lazy_ptr_name);
44358 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
44359 fprintf (file, ASM_LONG "%s\n", binder_name);
44361 #endif /* TARGET_MACHO */
44363 /* Order the registers for register allocator. */
44366 x86_order_regs_for_local_alloc (void)
44371 /* First allocate the local general purpose registers. */
44372 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
44373 if (GENERAL_REGNO_P (i) && call_used_regs[i])
44374 reg_alloc_order [pos++] = i;
44376 /* Global general purpose registers. */
44377 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
44378 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
44379 reg_alloc_order [pos++] = i;
44381 /* x87 registers come first in case we are doing FP math
44383 if (!TARGET_SSE_MATH)
44384 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
44385 reg_alloc_order [pos++] = i;
44387 /* SSE registers. */
44388 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
44389 reg_alloc_order [pos++] = i;
44390 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
44391 reg_alloc_order [pos++] = i;
44393 /* Extended REX SSE registers. */
44394 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
44395 reg_alloc_order [pos++] = i;
44397 /* Mask register. */
44398 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
44399 reg_alloc_order [pos++] = i;
44401 /* MPX bound registers. */
44402 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
44403 reg_alloc_order [pos++] = i;
44405 /* x87 registers. */
44406 if (TARGET_SSE_MATH)
44407 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
44408 reg_alloc_order [pos++] = i;
44410 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
44411 reg_alloc_order [pos++] = i;
44413 /* Initialize the rest of array as we do not allocate some registers
44415 while (pos < FIRST_PSEUDO_REGISTER)
44416 reg_alloc_order [pos++] = 0;
44419 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
44420 in struct attribute_spec handler. */
44422 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
44425 bool *no_add_attrs)
44427 if (TREE_CODE (*node) != FUNCTION_TYPE
44428 && TREE_CODE (*node) != METHOD_TYPE
44429 && TREE_CODE (*node) != FIELD_DECL
44430 && TREE_CODE (*node) != TYPE_DECL)
44432 warning (OPT_Wattributes, "%qE attribute only applies to functions",
44434 *no_add_attrs = true;
44439 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
44441 *no_add_attrs = true;
44444 if (is_attribute_p ("callee_pop_aggregate_return", name))
44448 cst = TREE_VALUE (args);
44449 if (TREE_CODE (cst) != INTEGER_CST)
44451 warning (OPT_Wattributes,
44452 "%qE attribute requires an integer constant argument",
44454 *no_add_attrs = true;
44456 else if (compare_tree_int (cst, 0) != 0
44457 && compare_tree_int (cst, 1) != 0)
44459 warning (OPT_Wattributes,
44460 "argument to %qE attribute is neither zero, nor one",
44462 *no_add_attrs = true;
44471 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
44472 struct attribute_spec.handler. */
44474 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
44475 bool *no_add_attrs)
44477 if (TREE_CODE (*node) != FUNCTION_TYPE
44478 && TREE_CODE (*node) != METHOD_TYPE
44479 && TREE_CODE (*node) != FIELD_DECL
44480 && TREE_CODE (*node) != TYPE_DECL)
44482 warning (OPT_Wattributes, "%qE attribute only applies to functions",
44484 *no_add_attrs = true;
44488 /* Can combine regparm with all attributes but fastcall. */
44489 if (is_attribute_p ("ms_abi", name))
44491 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
44493 error ("ms_abi and sysv_abi attributes are not compatible");
44498 else if (is_attribute_p ("sysv_abi", name))
44500 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
44502 error ("ms_abi and sysv_abi attributes are not compatible");
44511 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
44512 struct attribute_spec.handler. */
44514 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
44515 bool *no_add_attrs)
44518 if (DECL_P (*node))
44520 if (TREE_CODE (*node) == TYPE_DECL)
44521 type = &TREE_TYPE (*node);
44526 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
44528 warning (OPT_Wattributes, "%qE attribute ignored",
44530 *no_add_attrs = true;
44533 else if ((is_attribute_p ("ms_struct", name)
44534 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
44535 || ((is_attribute_p ("gcc_struct", name)
44536 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
44538 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
44540 *no_add_attrs = true;
44547 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
44548 bool *no_add_attrs)
44550 if (TREE_CODE (*node) != FUNCTION_DECL)
44552 warning (OPT_Wattributes, "%qE attribute only applies to functions",
44554 *no_add_attrs = true;
44560 ix86_ms_bitfield_layout_p (const_tree record_type)
44562 return ((TARGET_MS_BITFIELD_LAYOUT
44563 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
44564 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
44567 /* Returns an expression indicating where the this parameter is
44568 located on entry to the FUNCTION. */
44571 x86_this_parameter (tree function)
44573 tree type = TREE_TYPE (function);
44574 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
44579 const int *parm_regs;
44581 if (ix86_function_type_abi (type) == MS_ABI)
44582 parm_regs = x86_64_ms_abi_int_parameter_registers;
44584 parm_regs = x86_64_int_parameter_registers;
44585 return gen_rtx_REG (Pmode, parm_regs[aggr]);
44588 nregs = ix86_function_regparm (type, function);
44590 if (nregs > 0 && !stdarg_p (type))
44593 unsigned int ccvt = ix86_get_callcvt (type);
44595 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
44596 regno = aggr ? DX_REG : CX_REG;
44597 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
44601 return gen_rtx_MEM (SImode,
44602 plus_constant (Pmode, stack_pointer_rtx, 4));
44611 return gen_rtx_MEM (SImode,
44612 plus_constant (Pmode,
44613 stack_pointer_rtx, 4));
44616 return gen_rtx_REG (SImode, regno);
44619 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
44623 /* Determine whether x86_output_mi_thunk can succeed. */
44626 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
44627 const_tree function)
44629 /* 64-bit can handle anything. */
44633 /* For 32-bit, everything's fine if we have one free register. */
44634 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
44637 /* Need a free register for vcall_offset. */
44641 /* Need a free register for GOT references. */
44642 if (flag_pic && !targetm.binds_local_p (function))
44645 /* Otherwise ok. */
44649 /* Output the assembler code for a thunk function. THUNK_DECL is the
44650 declaration for the thunk function itself, FUNCTION is the decl for
44651 the target function. DELTA is an immediate constant offset to be
44652 added to THIS. If VCALL_OFFSET is nonzero, the word at
44653 *(*this + vcall_offset) should be added to THIS. */
44656 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
44657 HOST_WIDE_INT vcall_offset, tree function)
44659 rtx this_param = x86_this_parameter (function);
44660 rtx this_reg, tmp, fnaddr;
44661 unsigned int tmp_regno;
44665 tmp_regno = R10_REG;
44668 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
44669 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
44670 tmp_regno = AX_REG;
44671 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
44672 tmp_regno = DX_REG;
44674 tmp_regno = CX_REG;
44677 emit_note (NOTE_INSN_PROLOGUE_END);
44679 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
44680 pull it in now and let DELTA benefit. */
44681 if (REG_P (this_param))
44682 this_reg = this_param;
44683 else if (vcall_offset)
44685 /* Put the this parameter into %eax. */
44686 this_reg = gen_rtx_REG (Pmode, AX_REG);
44687 emit_move_insn (this_reg, this_param);
44690 this_reg = NULL_RTX;
44692 /* Adjust the this parameter by a fixed constant. */
44695 rtx delta_rtx = GEN_INT (delta);
44696 rtx delta_dst = this_reg ? this_reg : this_param;
44700 if (!x86_64_general_operand (delta_rtx, Pmode))
44702 tmp = gen_rtx_REG (Pmode, tmp_regno);
44703 emit_move_insn (tmp, delta_rtx);
44708 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
44711 /* Adjust the this parameter by a value stored in the vtable. */
44714 rtx vcall_addr, vcall_mem, this_mem;
44716 tmp = gen_rtx_REG (Pmode, tmp_regno);
44718 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
44719 if (Pmode != ptr_mode)
44720 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
44721 emit_move_insn (tmp, this_mem);
44723 /* Adjust the this parameter. */
44724 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
44726 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
44728 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
44729 emit_move_insn (tmp2, GEN_INT (vcall_offset));
44730 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
44733 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
44734 if (Pmode != ptr_mode)
44735 emit_insn (gen_addsi_1_zext (this_reg,
44736 gen_rtx_REG (ptr_mode,
44740 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
44743 /* If necessary, drop THIS back to its stack slot. */
44744 if (this_reg && this_reg != this_param)
44745 emit_move_insn (this_param, this_reg);
44747 fnaddr = XEXP (DECL_RTL (function), 0);
44750 if (!flag_pic || targetm.binds_local_p (function)
44755 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
44756 tmp = gen_rtx_CONST (Pmode, tmp);
44757 fnaddr = gen_const_mem (Pmode, tmp);
44762 if (!flag_pic || targetm.binds_local_p (function))
44765 else if (TARGET_MACHO)
44767 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
44768 fnaddr = XEXP (fnaddr, 0);
44770 #endif /* TARGET_MACHO */
44773 tmp = gen_rtx_REG (Pmode, CX_REG);
44774 output_set_got (tmp, NULL_RTX);
44776 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
44777 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
44778 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
44779 fnaddr = gen_const_mem (Pmode, fnaddr);
44783 /* Our sibling call patterns do not allow memories, because we have no
44784 predicate that can distinguish between frame and non-frame memory.
44785 For our purposes here, we can get away with (ab)using a jump pattern,
44786 because we're going to do no optimization. */
44787 if (MEM_P (fnaddr))
44789 if (sibcall_insn_operand (fnaddr, word_mode))
44791 fnaddr = XEXP (DECL_RTL (function), 0);
44792 tmp = gen_rtx_MEM (QImode, fnaddr);
44793 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
44794 tmp = emit_call_insn (tmp);
44795 SIBLING_CALL_P (tmp) = 1;
44798 emit_jump_insn (gen_indirect_jump (fnaddr));
44802 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
44804 // CM_LARGE_PIC always uses pseudo PIC register which is
44805 // uninitialized. Since FUNCTION is local and calling it
44806 // doesn't go through PLT, we use scratch register %r11 as
44807 // PIC register and initialize it here.
44808 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
44809 ix86_init_large_pic_reg (tmp_regno);
44810 fnaddr = legitimize_pic_address (fnaddr,
44811 gen_rtx_REG (Pmode, tmp_regno));
44814 if (!sibcall_insn_operand (fnaddr, word_mode))
44816 tmp = gen_rtx_REG (word_mode, tmp_regno);
44817 if (GET_MODE (fnaddr) != word_mode)
44818 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
44819 emit_move_insn (tmp, fnaddr);
44823 tmp = gen_rtx_MEM (QImode, fnaddr);
44824 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
44825 tmp = emit_call_insn (tmp);
44826 SIBLING_CALL_P (tmp) = 1;
44830 /* Emit just enough of rest_of_compilation to get the insns emitted.
44831 Note that use_thunk calls assemble_start_function et al. */
44832 insn = get_insns ();
44833 shorten_branches (insn);
44834 final_start_function (insn, file, 1);
44835 final (insn, file, 1);
44836 final_end_function ();
44840 x86_file_start (void)
44842 default_file_start ();
44844 fputs ("\t.code16gcc\n", asm_out_file);
44846 darwin_file_start ();
44848 if (X86_FILE_START_VERSION_DIRECTIVE)
44849 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
44850 if (X86_FILE_START_FLTUSED)
44851 fputs ("\t.global\t__fltused\n", asm_out_file);
44852 if (ix86_asm_dialect == ASM_INTEL)
44853 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
44857 x86_field_alignment (tree field, int computed)
44860 tree type = TREE_TYPE (field);
44862 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
44865 return iamcu_alignment (type, computed);
44866 mode = TYPE_MODE (strip_array_types (type));
44867 if (mode == DFmode || mode == DCmode
44868 || GET_MODE_CLASS (mode) == MODE_INT
44869 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
44870 return MIN (32, computed);
44874 /* Print call to TARGET to FILE. */
44877 x86_print_call_or_nop (FILE *file, const char *target)
44879 if (flag_nop_mcount)
44880 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
44882 fprintf (file, "1:\tcall\t%s\n", target);
44885 /* Output assembler code to FILE to increment profiler label # LABELNO
44886 for profiling a function entry. */
44888 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
44890 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
44894 #ifndef NO_PROFILE_COUNTERS
44895 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
44898 if (!TARGET_PECOFF && flag_pic)
44899 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
44901 x86_print_call_or_nop (file, mcount_name);
44905 #ifndef NO_PROFILE_COUNTERS
44906 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
44909 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
44913 #ifndef NO_PROFILE_COUNTERS
44914 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
44917 x86_print_call_or_nop (file, mcount_name);
44920 if (flag_record_mcount)
44922 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
44923 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
44924 fprintf (file, "\t.previous\n");
44928 /* We don't have exact information about the insn sizes, but we may assume
44929 quite safely that we are informed about all 1 byte insns and memory
44930 address sizes. This is enough to eliminate unnecessary padding in
44934 min_insn_size (rtx_insn *insn)
44938 if (!INSN_P (insn) || !active_insn_p (insn))
44941 /* Discard alignments we've emit and jump instructions. */
44942 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
44943 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
44946 /* Important case - calls are always 5 bytes.
44947 It is common to have many calls in the row. */
44949 && symbolic_reference_mentioned_p (PATTERN (insn))
44950 && !SIBLING_CALL_P (insn))
44952 len = get_attr_length (insn);
44956 /* For normal instructions we rely on get_attr_length being exact,
44957 with a few exceptions. */
44958 if (!JUMP_P (insn))
44960 enum attr_type type = get_attr_type (insn);
44965 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
44966 || asm_noperands (PATTERN (insn)) >= 0)
44973 /* Otherwise trust get_attr_length. */
44977 l = get_attr_length_address (insn);
44978 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
44987 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
44989 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
44993 ix86_avoid_jump_mispredicts (void)
44995 rtx_insn *insn, *start = get_insns ();
44996 int nbytes = 0, njumps = 0;
44997 bool isjump = false;
44999 /* Look for all minimal intervals of instructions containing 4 jumps.
45000 The intervals are bounded by START and INSN. NBYTES is the total
45001 size of instructions in the interval including INSN and not including
45002 START. When the NBYTES is smaller than 16 bytes, it is possible
45003 that the end of START and INSN ends up in the same 16byte page.
45005 The smallest offset in the page INSN can start is the case where START
45006 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
45007 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
45009 Don't consider asm goto as jump, while it can contain a jump, it doesn't
45010 have to, control transfer to label(s) can be performed through other
45011 means, and also we estimate minimum length of all asm stmts as 0. */
45012 for (insn = start; insn; insn = NEXT_INSN (insn))
45016 if (LABEL_P (insn))
45018 int align = label_to_alignment (insn);
45019 int max_skip = label_to_max_skip (insn);
45023 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
45024 already in the current 16 byte page, because otherwise
45025 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
45026 bytes to reach 16 byte boundary. */
45028 || (align <= 3 && max_skip != (1 << align) - 1))
45031 fprintf (dump_file, "Label %i with max_skip %i\n",
45032 INSN_UID (insn), max_skip);
45035 while (nbytes + max_skip >= 16)
45037 start = NEXT_INSN (start);
45038 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
45040 njumps--, isjump = true;
45043 nbytes -= min_insn_size (start);
45049 min_size = min_insn_size (insn);
45050 nbytes += min_size;
45052 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
45053 INSN_UID (insn), min_size);
45054 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
45062 start = NEXT_INSN (start);
45063 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
45065 njumps--, isjump = true;
45068 nbytes -= min_insn_size (start);
45070 gcc_assert (njumps >= 0);
45072 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
45073 INSN_UID (start), INSN_UID (insn), nbytes);
45075 if (njumps == 3 && isjump && nbytes < 16)
45077 int padsize = 15 - nbytes + min_insn_size (insn);
45080 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
45081 INSN_UID (insn), padsize);
45082 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
45088 /* AMD Athlon works faster
45089 when RET is not destination of conditional jump or directly preceded
45090 by other jump instruction. We avoid the penalty by inserting NOP just
45091 before the RET instructions in such cases. */
45093 ix86_pad_returns (void)
45098 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
45100 basic_block bb = e->src;
45101 rtx_insn *ret = BB_END (bb);
45103 bool replace = false;
45105 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
45106 || optimize_bb_for_size_p (bb))
45108 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
45109 if (active_insn_p (prev) || LABEL_P (prev))
45111 if (prev && LABEL_P (prev))
45116 FOR_EACH_EDGE (e, ei, bb->preds)
45117 if (EDGE_FREQUENCY (e) && e->src->index >= 0
45118 && !(e->flags & EDGE_FALLTHRU))
45126 prev = prev_active_insn (ret);
45128 && ((JUMP_P (prev) && any_condjump_p (prev))
45131 /* Empty functions get branch mispredict even when
45132 the jump destination is not visible to us. */
45133 if (!prev && !optimize_function_for_size_p (cfun))
45138 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
45144 /* Count the minimum number of instructions in BB. Return 4 if the
45145 number of instructions >= 4. */
45148 ix86_count_insn_bb (basic_block bb)
45151 int insn_count = 0;
45153 /* Count number of instructions in this block. Return 4 if the number
45154 of instructions >= 4. */
45155 FOR_BB_INSNS (bb, insn)
45157 /* Only happen in exit blocks. */
45159 && ANY_RETURN_P (PATTERN (insn)))
45162 if (NONDEBUG_INSN_P (insn)
45163 && GET_CODE (PATTERN (insn)) != USE
45164 && GET_CODE (PATTERN (insn)) != CLOBBER)
45167 if (insn_count >= 4)
45176 /* Count the minimum number of instructions in code path in BB.
45177 Return 4 if the number of instructions >= 4. */
45180 ix86_count_insn (basic_block bb)
45184 int min_prev_count;
45186 /* Only bother counting instructions along paths with no
45187 more than 2 basic blocks between entry and exit. Given
45188 that BB has an edge to exit, determine if a predecessor
45189 of BB has an edge from entry. If so, compute the number
45190 of instructions in the predecessor block. If there
45191 happen to be multiple such blocks, compute the minimum. */
45192 min_prev_count = 4;
45193 FOR_EACH_EDGE (e, ei, bb->preds)
45196 edge_iterator prev_ei;
45198 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
45200 min_prev_count = 0;
45203 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
45205 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
45207 int count = ix86_count_insn_bb (e->src);
45208 if (count < min_prev_count)
45209 min_prev_count = count;
45215 if (min_prev_count < 4)
45216 min_prev_count += ix86_count_insn_bb (bb);
45218 return min_prev_count;
45221 /* Pad short function to 4 instructions. */
45224 ix86_pad_short_function (void)
45229 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
45231 rtx_insn *ret = BB_END (e->src);
45232 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
45234 int insn_count = ix86_count_insn (e->src);
45236 /* Pad short function. */
45237 if (insn_count < 4)
45239 rtx_insn *insn = ret;
45241 /* Find epilogue. */
45244 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
45245 insn = PREV_INSN (insn);
45250 /* Two NOPs count as one instruction. */
45251 insn_count = 2 * (4 - insn_count);
45252 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
45258 /* Fix up a Windows system unwinder issue. If an EH region falls through into
45259 the epilogue, the Windows system unwinder will apply epilogue logic and
45260 produce incorrect offsets. This can be avoided by adding a nop between
45261 the last insn that can throw and the first insn of the epilogue. */
45264 ix86_seh_fixup_eh_fallthru (void)
45269 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
45271 rtx_insn *insn, *next;
45273 /* Find the beginning of the epilogue. */
45274 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
45275 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
45280 /* We only care about preceding insns that can throw. */
45281 insn = prev_active_insn (insn);
45282 if (insn == NULL || !can_throw_internal (insn))
45285 /* Do not separate calls from their debug information. */
45286 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
45288 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
45289 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
45294 emit_insn_after (gen_nops (const1_rtx), insn);
45298 /* Given a register number BASE, the lowest of a group of registers, update
45299 regsets IN and OUT with the registers that should be avoided in input
45300 and output operands respectively when trying to avoid generating a modr/m
45301 byte for -fmitigate-rop. */
45304 set_rop_modrm_reg_bits (int base, HARD_REG_SET &in, HARD_REG_SET &out)
45306 SET_HARD_REG_BIT (out, base);
45307 SET_HARD_REG_BIT (out, base + 1);
45308 SET_HARD_REG_BIT (in, base + 2);
45309 SET_HARD_REG_BIT (in, base + 3);
45312 /* Called if -fmitigate_rop is in effect. Try to rewrite instructions so
45313 that certain encodings of modr/m bytes do not occur. */
45315 ix86_mitigate_rop (void)
45317 HARD_REG_SET input_risky;
45318 HARD_REG_SET output_risky;
45319 HARD_REG_SET inout_risky;
45321 CLEAR_HARD_REG_SET (output_risky);
45322 CLEAR_HARD_REG_SET (input_risky);
45323 SET_HARD_REG_BIT (output_risky, AX_REG);
45324 SET_HARD_REG_BIT (output_risky, CX_REG);
45325 SET_HARD_REG_BIT (input_risky, BX_REG);
45326 SET_HARD_REG_BIT (input_risky, DX_REG);
45327 set_rop_modrm_reg_bits (FIRST_SSE_REG, input_risky, output_risky);
45328 set_rop_modrm_reg_bits (FIRST_REX_INT_REG, input_risky, output_risky);
45329 set_rop_modrm_reg_bits (FIRST_REX_SSE_REG, input_risky, output_risky);
45330 set_rop_modrm_reg_bits (FIRST_EXT_REX_SSE_REG, input_risky, output_risky);
45331 set_rop_modrm_reg_bits (FIRST_MASK_REG, input_risky, output_risky);
45332 set_rop_modrm_reg_bits (FIRST_BND_REG, input_risky, output_risky);
45333 COPY_HARD_REG_SET (inout_risky, input_risky);
45334 IOR_HARD_REG_SET (inout_risky, output_risky);
45336 df_note_add_problem ();
45337 /* Fix up what stack-regs did. */
45338 df_insn_rescan_all ();
45341 regrename_init (true);
45342 regrename_analyze (NULL);
45344 auto_vec<du_head_p> cands;
45346 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
45348 if (!NONDEBUG_INSN_P (insn))
45351 if (GET_CODE (PATTERN (insn)) == USE
45352 || GET_CODE (PATTERN (insn)) == CLOBBER)
45355 extract_insn (insn);
45358 int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
45359 recog_data.n_operands, &opno0,
45362 if (!ix86_rop_should_change_byte_p (modrm))
45365 insn_rr_info *info = &insn_rr[INSN_UID (insn)];
45367 /* This happens when regrename has to fail a block. */
45368 if (!info->op_info)
45371 if (info->op_info[opno0].n_chains != 0)
45373 gcc_assert (info->op_info[opno0].n_chains == 1);
45375 op0c = regrename_chain_from_id (info->op_info[opno0].heads[0]->id);
45376 if (op0c->target_data_1 + op0c->target_data_2 == 0
45377 && !op0c->cannot_rename)
45378 cands.safe_push (op0c);
45380 op0c->target_data_1++;
45382 if (info->op_info[opno1].n_chains != 0)
45384 gcc_assert (info->op_info[opno1].n_chains == 1);
45386 op1c = regrename_chain_from_id (info->op_info[opno1].heads[0]->id);
45387 if (op1c->target_data_1 + op1c->target_data_2 == 0
45388 && !op1c->cannot_rename)
45389 cands.safe_push (op1c);
45391 op1c->target_data_2++;
45397 FOR_EACH_VEC_ELT (cands, i, head)
45399 int old_reg, best_reg;
45400 HARD_REG_SET unavailable;
45402 CLEAR_HARD_REG_SET (unavailable);
45403 if (head->target_data_1)
45404 IOR_HARD_REG_SET (unavailable, output_risky);
45405 if (head->target_data_2)
45406 IOR_HARD_REG_SET (unavailable, input_risky);
45409 reg_class superclass = regrename_find_superclass (head, &n_uses,
45411 old_reg = head->regno;
45412 best_reg = find_rename_reg (head, superclass, &unavailable,
45414 bool ok = regrename_do_replace (head, best_reg);
45417 fprintf (dump_file, "Chain %d renamed as %s in %s\n", head->id,
45418 reg_names[best_reg], reg_class_names[superclass]);
45422 regrename_finish ();
45429 INIT_REG_SET (&live);
45431 FOR_EACH_BB_FN (bb, cfun)
45435 COPY_REG_SET (&live, DF_LR_OUT (bb));
45436 df_simulate_initialize_backwards (bb, &live);
45438 FOR_BB_INSNS_REVERSE (bb, insn)
45440 if (!NONDEBUG_INSN_P (insn))
45443 df_simulate_one_insn_backwards (bb, insn, &live);
45445 if (GET_CODE (PATTERN (insn)) == USE
45446 || GET_CODE (PATTERN (insn)) == CLOBBER)
45449 extract_insn (insn);
45450 constrain_operands_cached (insn, reload_completed);
45452 int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
45453 recog_data.n_operands, &opno0,
45456 || !ix86_rop_should_change_byte_p (modrm)
45460 rtx oldreg = recog_data.operand[opno1];
45461 preprocess_constraints (insn);
45462 const operand_alternative *alt = which_op_alt ();
45465 for (i = 0; i < recog_data.n_operands; i++)
45467 && alt[i].earlyclobber
45468 && reg_overlap_mentioned_p (recog_data.operand[i],
45472 if (i < recog_data.n_operands)
45476 fprintf (dump_file,
45477 "attempting to fix modrm byte in insn %d:"
45478 " reg %d class %s", INSN_UID (insn), REGNO (oldreg),
45479 reg_class_names[alt[opno1].cl]);
45481 HARD_REG_SET unavailable;
45482 REG_SET_TO_HARD_REG_SET (unavailable, &live);
45483 SET_HARD_REG_BIT (unavailable, REGNO (oldreg));
45484 IOR_COMPL_HARD_REG_SET (unavailable, call_used_reg_set);
45485 IOR_HARD_REG_SET (unavailable, fixed_reg_set);
45486 IOR_HARD_REG_SET (unavailable, output_risky);
45487 IOR_COMPL_HARD_REG_SET (unavailable,
45488 reg_class_contents[alt[opno1].cl]);
45490 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
45491 if (!TEST_HARD_REG_BIT (unavailable, i))
45493 if (i == FIRST_PSEUDO_REGISTER)
45496 fprintf (dump_file, ", none available\n");
45500 fprintf (dump_file, " -> %d\n", i);
45501 rtx newreg = gen_rtx_REG (recog_data.operand_mode[opno1], i);
45502 validate_change (insn, recog_data.operand_loc[opno1], newreg, false);
45503 insn = emit_insn_before (gen_move_insn (newreg, oldreg), insn);
45508 /* Implement machine specific optimizations. We implement padding of returns
45509 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
45513 /* We are freeing block_for_insn in the toplev to keep compatibility
45514 with old MDEP_REORGS that are not CFG based. Recompute it now. */
45515 compute_bb_for_insn ();
45517 if (flag_mitigate_rop)
45518 ix86_mitigate_rop ();
45520 if (TARGET_SEH && current_function_has_exception_handlers ())
45521 ix86_seh_fixup_eh_fallthru ();
45523 if (optimize && optimize_function_for_speed_p (cfun))
45525 if (TARGET_PAD_SHORT_FUNCTION)
45526 ix86_pad_short_function ();
45527 else if (TARGET_PAD_RETURNS)
45528 ix86_pad_returns ();
45529 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
45530 if (TARGET_FOUR_JUMP_LIMIT)
45531 ix86_avoid_jump_mispredicts ();
45536 /* Return nonzero when QImode register that must be represented via REX prefix
45539 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
45542 extract_insn_cached (insn);
45543 for (i = 0; i < recog_data.n_operands; i++)
45544 if (GENERAL_REG_P (recog_data.operand[i])
45545 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
45550 /* Return true when INSN mentions register that must be encoded using REX
45553 x86_extended_reg_mentioned_p (rtx insn)
45555 subrtx_iterator::array_type array;
45556 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
45558 const_rtx x = *iter;
45560 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
45566 /* If profitable, negate (without causing overflow) integer constant
45567 of mode MODE at location LOC. Return true in this case. */
45569 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
45573 if (!CONST_INT_P (*loc))
45579 /* DImode x86_64 constants must fit in 32 bits. */
45580 gcc_assert (x86_64_immediate_operand (*loc, mode));
45591 gcc_unreachable ();
45594 /* Avoid overflows. */
45595 if (mode_signbit_p (mode, *loc))
45598 val = INTVAL (*loc);
45600 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
45601 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
45602 if ((val < 0 && val != -128)
45605 *loc = GEN_INT (-val);
45612 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
45613 optabs would emit if we didn't have TFmode patterns. */
45616 x86_emit_floatuns (rtx operands[2])
45618 rtx_code_label *neglab, *donelab;
45619 rtx i0, i1, f0, in, out;
45620 machine_mode mode, inmode;
45622 inmode = GET_MODE (operands[1]);
45623 gcc_assert (inmode == SImode || inmode == DImode);
45626 in = force_reg (inmode, operands[1]);
45627 mode = GET_MODE (out);
45628 neglab = gen_label_rtx ();
45629 donelab = gen_label_rtx ();
45630 f0 = gen_reg_rtx (mode);
45632 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
45634 expand_float (out, in, 0);
45636 emit_jump_insn (gen_jump (donelab));
45639 emit_label (neglab);
45641 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
45643 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
45645 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
45647 expand_float (f0, i0, 0);
45649 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
45651 emit_label (donelab);
45654 static bool canonicalize_perm (struct expand_vec_perm_d *d);
45655 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
45656 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
45657 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
45659 /* Get a vector mode of the same size as the original but with elements
45660 twice as wide. This is only guaranteed to apply to integral vectors. */
45662 static inline machine_mode
45663 get_mode_wider_vector (machine_mode o)
45665 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
45666 machine_mode n = GET_MODE_WIDER_MODE (o);
45667 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
45668 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
45672 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
45673 fill target with val via vec_duplicate. */
45676 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
45682 /* First attempt to recognize VAL as-is. */
45683 dup = gen_rtx_VEC_DUPLICATE (mode, val);
45684 insn = emit_insn (gen_rtx_SET (target, dup));
45685 if (recog_memoized (insn) < 0)
45688 /* If that fails, force VAL into a register. */
45691 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
45692 seq = get_insns ();
45695 emit_insn_before (seq, insn);
45697 ok = recog_memoized (insn) >= 0;
45703 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
45704 with all elements equal to VAR. Return true if successful. */
45707 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
45708 rtx target, rtx val)
45732 return ix86_vector_duplicate_value (mode, target, val);
45737 if (TARGET_SSE || TARGET_3DNOW_A)
45741 val = gen_lowpart (SImode, val);
45742 x = gen_rtx_TRUNCATE (HImode, val);
45743 x = gen_rtx_VEC_DUPLICATE (mode, x);
45744 emit_insn (gen_rtx_SET (target, x));
45756 return ix86_vector_duplicate_value (mode, target, val);
45760 struct expand_vec_perm_d dperm;
45764 memset (&dperm, 0, sizeof (dperm));
45765 dperm.target = target;
45766 dperm.vmode = mode;
45767 dperm.nelt = GET_MODE_NUNITS (mode);
45768 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
45769 dperm.one_operand_p = true;
45771 /* Extend to SImode using a paradoxical SUBREG. */
45772 tmp1 = gen_reg_rtx (SImode);
45773 emit_move_insn (tmp1, gen_lowpart (SImode, val));
45775 /* Insert the SImode value as low element of a V4SImode vector. */
45776 tmp2 = gen_reg_rtx (V4SImode);
45777 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
45778 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
45780 ok = (expand_vec_perm_1 (&dperm)
45781 || expand_vec_perm_broadcast_1 (&dperm));
45789 return ix86_vector_duplicate_value (mode, target, val);
45796 /* Replicate the value once into the next wider mode and recurse. */
45798 machine_mode smode, wsmode, wvmode;
45801 smode = GET_MODE_INNER (mode);
45802 wvmode = get_mode_wider_vector (mode);
45803 wsmode = GET_MODE_INNER (wvmode);
45805 val = convert_modes (wsmode, smode, val, true);
45806 x = expand_simple_binop (wsmode, ASHIFT, val,
45807 GEN_INT (GET_MODE_BITSIZE (smode)),
45808 NULL_RTX, 1, OPTAB_LIB_WIDEN);
45809 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
45811 x = gen_reg_rtx (wvmode);
45812 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
45814 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
45821 return ix86_vector_duplicate_value (mode, target, val);
45824 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
45825 rtx x = gen_reg_rtx (hvmode);
45827 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
45830 x = gen_rtx_VEC_CONCAT (mode, x, x);
45831 emit_insn (gen_rtx_SET (target, x));
45837 if (TARGET_AVX512BW)
45838 return ix86_vector_duplicate_value (mode, target, val);
45841 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
45842 rtx x = gen_reg_rtx (hvmode);
45844 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
45847 x = gen_rtx_VEC_CONCAT (mode, x, x);
45848 emit_insn (gen_rtx_SET (target, x));
45857 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
45858 whose ONE_VAR element is VAR, and other elements are zero. Return true
45862 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
45863 rtx target, rtx var, int one_var)
45865 machine_mode vsimode;
45868 bool use_vector_set = false;
45873 /* For SSE4.1, we normally use vector set. But if the second
45874 element is zero and inter-unit moves are OK, we use movq
45876 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
45877 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
45883 use_vector_set = TARGET_SSE4_1;
45886 use_vector_set = TARGET_SSE2;
45889 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
45896 use_vector_set = TARGET_AVX;
45899 /* Use ix86_expand_vector_set in 64bit mode only. */
45900 use_vector_set = TARGET_AVX && TARGET_64BIT;
45906 if (use_vector_set)
45908 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
45909 var = force_reg (GET_MODE_INNER (mode), var);
45910 ix86_expand_vector_set (mmx_ok, target, var, one_var);
45926 var = force_reg (GET_MODE_INNER (mode), var);
45927 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
45928 emit_insn (gen_rtx_SET (target, x));
45933 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
45934 new_target = gen_reg_rtx (mode);
45936 new_target = target;
45937 var = force_reg (GET_MODE_INNER (mode), var);
45938 x = gen_rtx_VEC_DUPLICATE (mode, var);
45939 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
45940 emit_insn (gen_rtx_SET (new_target, x));
45943 /* We need to shuffle the value to the correct position, so
45944 create a new pseudo to store the intermediate result. */
45946 /* With SSE2, we can use the integer shuffle insns. */
45947 if (mode != V4SFmode && TARGET_SSE2)
45949 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
45951 GEN_INT (one_var == 1 ? 0 : 1),
45952 GEN_INT (one_var == 2 ? 0 : 1),
45953 GEN_INT (one_var == 3 ? 0 : 1)));
45954 if (target != new_target)
45955 emit_move_insn (target, new_target);
45959 /* Otherwise convert the intermediate result to V4SFmode and
45960 use the SSE1 shuffle instructions. */
45961 if (mode != V4SFmode)
45963 tmp = gen_reg_rtx (V4SFmode);
45964 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
45969 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
45971 GEN_INT (one_var == 1 ? 0 : 1),
45972 GEN_INT (one_var == 2 ? 0+4 : 1+4),
45973 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
45975 if (mode != V4SFmode)
45976 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
45977 else if (tmp != target)
45978 emit_move_insn (target, tmp);
45980 else if (target != new_target)
45981 emit_move_insn (target, new_target);
45986 vsimode = V4SImode;
45992 vsimode = V2SImode;
45998 /* Zero extend the variable element to SImode and recurse. */
45999 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
46001 x = gen_reg_rtx (vsimode);
46002 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
46004 gcc_unreachable ();
46006 emit_move_insn (target, gen_lowpart (mode, x));
46014 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
46015 consisting of the values in VALS. It is known that all elements
46016 except ONE_VAR are constants. Return true if successful. */
46019 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
46020 rtx target, rtx vals, int one_var)
46022 rtx var = XVECEXP (vals, 0, one_var);
46023 machine_mode wmode;
46026 const_vec = copy_rtx (vals);
46027 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
46028 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
46036 /* For the two element vectors, it's just as easy to use
46037 the general case. */
46041 /* Use ix86_expand_vector_set in 64bit mode only. */
46064 /* There's no way to set one QImode entry easily. Combine
46065 the variable value with its adjacent constant value, and
46066 promote to an HImode set. */
46067 x = XVECEXP (vals, 0, one_var ^ 1);
46070 var = convert_modes (HImode, QImode, var, true);
46071 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
46072 NULL_RTX, 1, OPTAB_LIB_WIDEN);
46073 x = GEN_INT (INTVAL (x) & 0xff);
46077 var = convert_modes (HImode, QImode, var, true);
46078 x = gen_int_mode (INTVAL (x) << 8, HImode);
46080 if (x != const0_rtx)
46081 var = expand_simple_binop (HImode, IOR, var, x, var,
46082 1, OPTAB_LIB_WIDEN);
46084 x = gen_reg_rtx (wmode);
46085 emit_move_insn (x, gen_lowpart (wmode, const_vec));
46086 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
46088 emit_move_insn (target, gen_lowpart (mode, x));
46095 emit_move_insn (target, const_vec);
46096 ix86_expand_vector_set (mmx_ok, target, var, one_var);
46100 /* A subroutine of ix86_expand_vector_init_general. Use vector
46101 concatenate to handle the most general case: all values variable,
46102 and none identical. */
46105 ix86_expand_vector_init_concat (machine_mode mode,
46106 rtx target, rtx *ops, int n)
46108 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
46109 rtx first[16], second[8], third[4];
46161 gcc_unreachable ();
46164 if (!register_operand (ops[1], cmode))
46165 ops[1] = force_reg (cmode, ops[1]);
46166 if (!register_operand (ops[0], cmode))
46167 ops[0] = force_reg (cmode, ops[0]);
46168 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
46188 gcc_unreachable ();
46212 gcc_unreachable ();
46230 gcc_unreachable ();
46235 /* FIXME: We process inputs backward to help RA. PR 36222. */
46238 for (; i > 0; i -= 2, j--)
46240 first[j] = gen_reg_rtx (cmode);
46241 v = gen_rtvec (2, ops[i - 1], ops[i]);
46242 ix86_expand_vector_init (false, first[j],
46243 gen_rtx_PARALLEL (cmode, v));
46249 gcc_assert (hmode != VOIDmode);
46250 gcc_assert (gmode != VOIDmode);
46251 for (i = j = 0; i < n; i += 2, j++)
46253 second[j] = gen_reg_rtx (hmode);
46254 ix86_expand_vector_init_concat (hmode, second [j],
46258 for (i = j = 0; i < n; i += 2, j++)
46260 third[j] = gen_reg_rtx (gmode);
46261 ix86_expand_vector_init_concat (gmode, third[j],
46265 ix86_expand_vector_init_concat (mode, target, third, n);
46269 gcc_assert (hmode != VOIDmode);
46270 for (i = j = 0; i < n; i += 2, j++)
46272 second[j] = gen_reg_rtx (hmode);
46273 ix86_expand_vector_init_concat (hmode, second [j],
46277 ix86_expand_vector_init_concat (mode, target, second, n);
46280 ix86_expand_vector_init_concat (mode, target, first, n);
46284 gcc_unreachable ();
46288 /* A subroutine of ix86_expand_vector_init_general. Use vector
46289 interleave to handle the most general case: all values variable,
46290 and none identical. */
46293 ix86_expand_vector_init_interleave (machine_mode mode,
46294 rtx target, rtx *ops, int n)
46296 machine_mode first_imode, second_imode, third_imode, inner_mode;
46299 rtx (*gen_load_even) (rtx, rtx, rtx);
46300 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
46301 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
46306 gen_load_even = gen_vec_setv8hi;
46307 gen_interleave_first_low = gen_vec_interleave_lowv4si;
46308 gen_interleave_second_low = gen_vec_interleave_lowv2di;
46309 inner_mode = HImode;
46310 first_imode = V4SImode;
46311 second_imode = V2DImode;
46312 third_imode = VOIDmode;
46315 gen_load_even = gen_vec_setv16qi;
46316 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
46317 gen_interleave_second_low = gen_vec_interleave_lowv4si;
46318 inner_mode = QImode;
46319 first_imode = V8HImode;
46320 second_imode = V4SImode;
46321 third_imode = V2DImode;
46324 gcc_unreachable ();
46327 for (i = 0; i < n; i++)
46329 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
46330 op0 = gen_reg_rtx (SImode);
46331 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
46333 /* Insert the SImode value as low element of V4SImode vector. */
46334 op1 = gen_reg_rtx (V4SImode);
46335 op0 = gen_rtx_VEC_MERGE (V4SImode,
46336 gen_rtx_VEC_DUPLICATE (V4SImode,
46338 CONST0_RTX (V4SImode),
46340 emit_insn (gen_rtx_SET (op1, op0));
46342 /* Cast the V4SImode vector back to a vector in orignal mode. */
46343 op0 = gen_reg_rtx (mode);
46344 emit_move_insn (op0, gen_lowpart (mode, op1));
46346 /* Load even elements into the second position. */
46347 emit_insn (gen_load_even (op0,
46348 force_reg (inner_mode,
46352 /* Cast vector to FIRST_IMODE vector. */
46353 ops[i] = gen_reg_rtx (first_imode);
46354 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
46357 /* Interleave low FIRST_IMODE vectors. */
46358 for (i = j = 0; i < n; i += 2, j++)
46360 op0 = gen_reg_rtx (first_imode);
46361 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
46363 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
46364 ops[j] = gen_reg_rtx (second_imode);
46365 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
46368 /* Interleave low SECOND_IMODE vectors. */
46369 switch (second_imode)
46372 for (i = j = 0; i < n / 2; i += 2, j++)
46374 op0 = gen_reg_rtx (second_imode);
46375 emit_insn (gen_interleave_second_low (op0, ops[i],
46378 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
46380 ops[j] = gen_reg_rtx (third_imode);
46381 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
46383 second_imode = V2DImode;
46384 gen_interleave_second_low = gen_vec_interleave_lowv2di;
46388 op0 = gen_reg_rtx (second_imode);
46389 emit_insn (gen_interleave_second_low (op0, ops[0],
46392 /* Cast the SECOND_IMODE vector back to a vector on original
46394 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0)));
46398 gcc_unreachable ();
46402 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
46403 all values variable, and none identical. */
46406 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
46407 rtx target, rtx vals)
46409 rtx ops[64], op0, op1, op2, op3, op4, op5;
46410 machine_mode half_mode = VOIDmode;
46411 machine_mode quarter_mode = VOIDmode;
46418 if (!mmx_ok && !TARGET_SSE)
46434 n = GET_MODE_NUNITS (mode);
46435 for (i = 0; i < n; i++)
46436 ops[i] = XVECEXP (vals, 0, i);
46437 ix86_expand_vector_init_concat (mode, target, ops, n);
46441 half_mode = V16QImode;
46445 half_mode = V8HImode;
46449 n = GET_MODE_NUNITS (mode);
46450 for (i = 0; i < n; i++)
46451 ops[i] = XVECEXP (vals, 0, i);
46452 op0 = gen_reg_rtx (half_mode);
46453 op1 = gen_reg_rtx (half_mode);
46454 ix86_expand_vector_init_interleave (half_mode, op0, ops,
46456 ix86_expand_vector_init_interleave (half_mode, op1,
46457 &ops [n >> 1], n >> 2);
46458 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1)));
46462 quarter_mode = V16QImode;
46463 half_mode = V32QImode;
46467 quarter_mode = V8HImode;
46468 half_mode = V16HImode;
46472 n = GET_MODE_NUNITS (mode);
46473 for (i = 0; i < n; i++)
46474 ops[i] = XVECEXP (vals, 0, i);
46475 op0 = gen_reg_rtx (quarter_mode);
46476 op1 = gen_reg_rtx (quarter_mode);
46477 op2 = gen_reg_rtx (quarter_mode);
46478 op3 = gen_reg_rtx (quarter_mode);
46479 op4 = gen_reg_rtx (half_mode);
46480 op5 = gen_reg_rtx (half_mode);
46481 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
46483 ix86_expand_vector_init_interleave (quarter_mode, op1,
46484 &ops [n >> 2], n >> 3);
46485 ix86_expand_vector_init_interleave (quarter_mode, op2,
46486 &ops [n >> 1], n >> 3);
46487 ix86_expand_vector_init_interleave (quarter_mode, op3,
46488 &ops [(n >> 1) | (n >> 2)], n >> 3);
46489 emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
46490 emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
46491 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5)));
46495 if (!TARGET_SSE4_1)
46503 /* Don't use ix86_expand_vector_init_interleave if we can't
46504 move from GPR to SSE register directly. */
46505 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
46508 n = GET_MODE_NUNITS (mode);
46509 for (i = 0; i < n; i++)
46510 ops[i] = XVECEXP (vals, 0, i);
46511 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
46519 gcc_unreachable ();
46523 int i, j, n_elts, n_words, n_elt_per_word;
46524 machine_mode inner_mode;
46525 rtx words[4], shift;
46527 inner_mode = GET_MODE_INNER (mode);
46528 n_elts = GET_MODE_NUNITS (mode);
46529 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46530 n_elt_per_word = n_elts / n_words;
46531 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
46533 for (i = 0; i < n_words; ++i)
46535 rtx word = NULL_RTX;
46537 for (j = 0; j < n_elt_per_word; ++j)
46539 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
46540 elt = convert_modes (word_mode, inner_mode, elt, true);
46546 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
46547 word, 1, OPTAB_LIB_WIDEN);
46548 word = expand_simple_binop (word_mode, IOR, word, elt,
46549 word, 1, OPTAB_LIB_WIDEN);
46557 emit_move_insn (target, gen_lowpart (mode, words[0]));
46558 else if (n_words == 2)
46560 rtx tmp = gen_reg_rtx (mode);
46561 emit_clobber (tmp);
46562 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
46563 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
46564 emit_move_insn (target, tmp);
46566 else if (n_words == 4)
46568 rtx tmp = gen_reg_rtx (V4SImode);
46569 gcc_assert (word_mode == SImode);
46570 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
46571 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
46572 emit_move_insn (target, gen_lowpart (mode, tmp));
46575 gcc_unreachable ();
46579 /* Initialize vector TARGET via VALS. Suppress the use of MMX
46580 instructions unless MMX_OK is true. */
46583 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
46585 machine_mode mode = GET_MODE (target);
46586 machine_mode inner_mode = GET_MODE_INNER (mode);
46587 int n_elts = GET_MODE_NUNITS (mode);
46588 int n_var = 0, one_var = -1;
46589 bool all_same = true, all_const_zero = true;
46593 for (i = 0; i < n_elts; ++i)
46595 x = XVECEXP (vals, 0, i);
46596 if (!(CONST_SCALAR_INT_P (x)
46597 || CONST_DOUBLE_P (x)
46598 || CONST_FIXED_P (x)))
46599 n_var++, one_var = i;
46600 else if (x != CONST0_RTX (inner_mode))
46601 all_const_zero = false;
46602 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
46606 /* Constants are best loaded from the constant pool. */
46609 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
46613 /* If all values are identical, broadcast the value. */
46615 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
46616 XVECEXP (vals, 0, 0)))
46619 /* Values where only one field is non-constant are best loaded from
46620 the pool and overwritten via move later. */
46624 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
46625 XVECEXP (vals, 0, one_var),
46629 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
46633 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
46637 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
46639 machine_mode mode = GET_MODE (target);
46640 machine_mode inner_mode = GET_MODE_INNER (mode);
46641 machine_mode half_mode;
46642 bool use_vec_merge = false;
46644 static rtx (*gen_extract[6][2]) (rtx, rtx)
46646 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
46647 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
46648 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
46649 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
46650 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
46651 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
46653 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
46655 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
46656 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
46657 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
46658 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
46659 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
46660 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
46663 machine_mode mmode = VOIDmode;
46664 rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
46672 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
46673 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
46675 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
46677 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
46678 emit_insn (gen_rtx_SET (target, tmp));
46684 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
46688 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
46689 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
46691 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
46693 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
46694 emit_insn (gen_rtx_SET (target, tmp));
46701 /* For the two element vectors, we implement a VEC_CONCAT with
46702 the extraction of the other element. */
46704 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
46705 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
46708 op0 = val, op1 = tmp;
46710 op0 = tmp, op1 = val;
46712 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
46713 emit_insn (gen_rtx_SET (target, tmp));
46718 use_vec_merge = TARGET_SSE4_1;
46725 use_vec_merge = true;
46729 /* tmp = target = A B C D */
46730 tmp = copy_to_reg (target);
46731 /* target = A A B B */
46732 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
46733 /* target = X A B B */
46734 ix86_expand_vector_set (false, target, val, 0);
46735 /* target = A X C D */
46736 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
46737 const1_rtx, const0_rtx,
46738 GEN_INT (2+4), GEN_INT (3+4)));
46742 /* tmp = target = A B C D */
46743 tmp = copy_to_reg (target);
46744 /* tmp = X B C D */
46745 ix86_expand_vector_set (false, tmp, val, 0);
46746 /* target = A B X D */
46747 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
46748 const0_rtx, const1_rtx,
46749 GEN_INT (0+4), GEN_INT (3+4)));
46753 /* tmp = target = A B C D */
46754 tmp = copy_to_reg (target);
46755 /* tmp = X B C D */
46756 ix86_expand_vector_set (false, tmp, val, 0);
46757 /* target = A B X D */
46758 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
46759 const0_rtx, const1_rtx,
46760 GEN_INT (2+4), GEN_INT (0+4)));
46764 gcc_unreachable ();
46769 use_vec_merge = TARGET_SSE4_1;
46773 /* Element 0 handled by vec_merge below. */
46776 use_vec_merge = true;
46782 /* With SSE2, use integer shuffles to swap element 0 and ELT,
46783 store into element 0, then shuffle them back. */
46787 order[0] = GEN_INT (elt);
46788 order[1] = const1_rtx;
46789 order[2] = const2_rtx;
46790 order[3] = GEN_INT (3);
46791 order[elt] = const0_rtx;
46793 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
46794 order[1], order[2], order[3]));
46796 ix86_expand_vector_set (false, target, val, 0);
46798 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
46799 order[1], order[2], order[3]));
46803 /* For SSE1, we have to reuse the V4SF code. */
46804 rtx t = gen_reg_rtx (V4SFmode);
46805 emit_move_insn (t, gen_lowpart (V4SFmode, target));
46806 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
46807 emit_move_insn (target, gen_lowpart (mode, t));
46812 use_vec_merge = TARGET_SSE2;
46815 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
46819 use_vec_merge = TARGET_SSE4_1;
46826 half_mode = V16QImode;
46832 half_mode = V8HImode;
46838 half_mode = V4SImode;
46844 half_mode = V2DImode;
46850 half_mode = V4SFmode;
46856 half_mode = V2DFmode;
46862 /* Compute offset. */
46866 gcc_assert (i <= 1);
46868 /* Extract the half. */
46869 tmp = gen_reg_rtx (half_mode);
46870 emit_insn (gen_extract[j][i] (tmp, target));
46872 /* Put val in tmp at elt. */
46873 ix86_expand_vector_set (false, tmp, val, elt);
46876 emit_insn (gen_insert[j][i] (target, target, tmp));
46880 if (TARGET_AVX512F)
46883 gen_blendm = gen_avx512f_blendmv8df;
46888 if (TARGET_AVX512F)
46891 gen_blendm = gen_avx512f_blendmv8di;
46896 if (TARGET_AVX512F)
46899 gen_blendm = gen_avx512f_blendmv16sf;
46904 if (TARGET_AVX512F)
46907 gen_blendm = gen_avx512f_blendmv16si;
46912 if (TARGET_AVX512F && TARGET_AVX512BW)
46915 gen_blendm = gen_avx512bw_blendmv32hi;
46920 if (TARGET_AVX512F && TARGET_AVX512BW)
46923 gen_blendm = gen_avx512bw_blendmv64qi;
46931 if (mmode != VOIDmode)
46933 tmp = gen_reg_rtx (mode);
46934 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
46935 emit_insn (gen_blendm (target, tmp, target,
46937 gen_int_mode (1 << elt, mmode))));
46939 else if (use_vec_merge)
46941 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
46942 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
46943 emit_insn (gen_rtx_SET (target, tmp));
46947 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
46949 emit_move_insn (mem, target);
46951 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
46952 emit_move_insn (tmp, val);
46954 emit_move_insn (target, mem);
46959 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
46961 machine_mode mode = GET_MODE (vec);
46962 machine_mode inner_mode = GET_MODE_INNER (mode);
46963 bool use_vec_extr = false;
46976 use_vec_extr = true;
46980 use_vec_extr = TARGET_SSE4_1;
46992 tmp = gen_reg_rtx (mode);
46993 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
46994 GEN_INT (elt), GEN_INT (elt),
46995 GEN_INT (elt+4), GEN_INT (elt+4)));
46999 tmp = gen_reg_rtx (mode);
47000 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
47004 gcc_unreachable ();
47007 use_vec_extr = true;
47012 use_vec_extr = TARGET_SSE4_1;
47026 tmp = gen_reg_rtx (mode);
47027 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
47028 GEN_INT (elt), GEN_INT (elt),
47029 GEN_INT (elt), GEN_INT (elt)));
47033 tmp = gen_reg_rtx (mode);
47034 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
47038 gcc_unreachable ();
47041 use_vec_extr = true;
47046 /* For SSE1, we have to reuse the V4SF code. */
47047 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
47048 gen_lowpart (V4SFmode, vec), elt);
47054 use_vec_extr = TARGET_SSE2;
47057 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
47061 use_vec_extr = TARGET_SSE4_1;
47067 tmp = gen_reg_rtx (V4SFmode);
47069 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
47071 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
47072 ix86_expand_vector_extract (false, target, tmp, elt & 3);
47080 tmp = gen_reg_rtx (V2DFmode);
47082 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
47084 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
47085 ix86_expand_vector_extract (false, target, tmp, elt & 1);
47093 tmp = gen_reg_rtx (V16QImode);
47095 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
47097 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
47098 ix86_expand_vector_extract (false, target, tmp, elt & 15);
47106 tmp = gen_reg_rtx (V8HImode);
47108 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
47110 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
47111 ix86_expand_vector_extract (false, target, tmp, elt & 7);
47119 tmp = gen_reg_rtx (V4SImode);
47121 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
47123 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
47124 ix86_expand_vector_extract (false, target, tmp, elt & 3);
47132 tmp = gen_reg_rtx (V2DImode);
47134 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
47136 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
47137 ix86_expand_vector_extract (false, target, tmp, elt & 1);
47143 if (TARGET_AVX512BW)
47145 tmp = gen_reg_rtx (V16HImode);
47147 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
47149 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
47150 ix86_expand_vector_extract (false, target, tmp, elt & 15);
47156 if (TARGET_AVX512BW)
47158 tmp = gen_reg_rtx (V32QImode);
47160 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
47162 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
47163 ix86_expand_vector_extract (false, target, tmp, elt & 31);
47169 tmp = gen_reg_rtx (V8SFmode);
47171 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
47173 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
47174 ix86_expand_vector_extract (false, target, tmp, elt & 7);
47178 tmp = gen_reg_rtx (V4DFmode);
47180 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
47182 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
47183 ix86_expand_vector_extract (false, target, tmp, elt & 3);
47187 tmp = gen_reg_rtx (V8SImode);
47189 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
47191 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
47192 ix86_expand_vector_extract (false, target, tmp, elt & 7);
47196 tmp = gen_reg_rtx (V4DImode);
47198 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
47200 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
47201 ix86_expand_vector_extract (false, target, tmp, elt & 3);
47205 /* ??? Could extract the appropriate HImode element and shift. */
47212 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
47213 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
47215 /* Let the rtl optimizers know about the zero extension performed. */
47216 if (inner_mode == QImode || inner_mode == HImode)
47218 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
47219 target = gen_lowpart (SImode, target);
47222 emit_insn (gen_rtx_SET (target, tmp));
47226 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
47228 emit_move_insn (mem, vec);
47230 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
47231 emit_move_insn (target, tmp);
47235 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
47236 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
47237 The upper bits of DEST are undefined, though they shouldn't cause
47238 exceptions (some bits from src or all zeros are ok). */
47241 emit_reduc_half (rtx dest, rtx src, int i)
47244 switch (GET_MODE (src))
47248 tem = gen_sse_movhlps (dest, src, src);
47250 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
47251 GEN_INT (1 + 4), GEN_INT (1 + 4));
47254 tem = gen_vec_interleave_highv2df (dest, src, src);
47260 d = gen_reg_rtx (V1TImode);
47261 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
47266 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
47268 tem = gen_avx_shufps256 (dest, src, src,
47269 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
47273 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
47275 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
47283 if (GET_MODE (dest) != V4DImode)
47284 d = gen_reg_rtx (V4DImode);
47285 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
47286 gen_lowpart (V4DImode, src),
47291 d = gen_reg_rtx (V2TImode);
47292 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
47303 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
47304 gen_lowpart (V16SImode, src),
47305 gen_lowpart (V16SImode, src),
47306 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
47307 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
47308 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
47309 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
47310 GEN_INT (0xC), GEN_INT (0xD),
47311 GEN_INT (0xE), GEN_INT (0xF),
47312 GEN_INT (0x10), GEN_INT (0x11),
47313 GEN_INT (0x12), GEN_INT (0x13),
47314 GEN_INT (0x14), GEN_INT (0x15),
47315 GEN_INT (0x16), GEN_INT (0x17));
47317 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
47318 gen_lowpart (V16SImode, src),
47319 GEN_INT (i == 128 ? 0x2 : 0x1),
47323 GEN_INT (i == 128 ? 0x6 : 0x5),
47327 GEN_INT (i == 128 ? 0xA : 0x9),
47331 GEN_INT (i == 128 ? 0xE : 0xD),
47337 gcc_unreachable ();
47341 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
47344 /* Expand a vector reduction. FN is the binary pattern to reduce;
47345 DEST is the destination; IN is the input vector. */
47348 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
47350 rtx half, dst, vec = in;
47351 machine_mode mode = GET_MODE (in);
47354 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
47356 && mode == V8HImode
47357 && fn == gen_uminv8hi3)
47359 emit_insn (gen_sse4_1_phminposuw (dest, in));
47363 for (i = GET_MODE_BITSIZE (mode);
47364 i > GET_MODE_UNIT_BITSIZE (mode);
47367 half = gen_reg_rtx (mode);
47368 emit_reduc_half (half, vec, i);
47369 if (i == GET_MODE_UNIT_BITSIZE (mode) * 2)
47372 dst = gen_reg_rtx (mode);
47373 emit_insn (fn (dst, half, vec));
47378 /* Target hook for scalar_mode_supported_p. */
47380 ix86_scalar_mode_supported_p (machine_mode mode)
47382 if (DECIMAL_FLOAT_MODE_P (mode))
47383 return default_decimal_float_supported_p ();
47384 else if (mode == TFmode)
47387 return default_scalar_mode_supported_p (mode);
47390 /* Implements target hook vector_mode_supported_p. */
47392 ix86_vector_mode_supported_p (machine_mode mode)
47394 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
47396 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
47398 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
47400 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
47402 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
47404 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
47409 /* Implement target hook libgcc_floating_mode_supported_p. */
47411 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
47421 #ifdef IX86_NO_LIBGCC_TFMODE
47423 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
47424 return TARGET_LONG_DOUBLE_128;
47434 /* Target hook for c_mode_for_suffix. */
47435 static machine_mode
47436 ix86_c_mode_for_suffix (char suffix)
47446 /* Worker function for TARGET_MD_ASM_ADJUST.
47448 We implement asm flag outputs, and maintain source compatibility
47449 with the old cc0-based compiler. */
47452 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
47453 vec<const char *> &constraints,
47454 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
47456 clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG));
47457 SET_HARD_REG_BIT (clobbered_regs, FPSR_REG);
47459 bool saw_asm_flag = false;
47462 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
47464 const char *con = constraints[i];
47465 if (strncmp (con, "=@cc", 4) != 0)
47468 if (strchr (con, ',') != NULL)
47470 error ("alternatives not allowed in asm flag output");
47474 bool invert = false;
47476 invert = true, con++;
47478 machine_mode mode = CCmode;
47479 rtx_code code = UNKNOWN;
47485 mode = CCAmode, code = EQ;
47486 else if (con[1] == 'e' && con[2] == 0)
47487 mode = CCCmode, code = NE;
47491 mode = CCCmode, code = EQ;
47492 else if (con[1] == 'e' && con[2] == 0)
47493 mode = CCAmode, code = NE;
47497 mode = CCCmode, code = EQ;
47501 mode = CCZmode, code = EQ;
47505 mode = CCGCmode, code = GT;
47506 else if (con[1] == 'e' && con[2] == 0)
47507 mode = CCGCmode, code = GE;
47511 mode = CCGCmode, code = LT;
47512 else if (con[1] == 'e' && con[2] == 0)
47513 mode = CCGCmode, code = LE;
47517 mode = CCOmode, code = EQ;
47521 mode = CCPmode, code = EQ;
47525 mode = CCSmode, code = EQ;
47529 mode = CCZmode, code = EQ;
47532 if (code == UNKNOWN)
47534 error ("unknown asm flag output %qs", constraints[i]);
47538 code = reverse_condition (code);
47540 rtx dest = outputs[i];
47543 /* This is the first asm flag output. Here we put the flags
47544 register in as the real output and adjust the condition to
47546 constraints[i] = "=Bf";
47547 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
47548 saw_asm_flag = true;
47552 /* We don't need the flags register as output twice. */
47553 constraints[i] = "=X";
47554 outputs[i] = gen_rtx_SCRATCH (SImode);
47557 rtx x = gen_rtx_REG (mode, FLAGS_REG);
47558 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
47560 machine_mode dest_mode = GET_MODE (dest);
47561 if (!SCALAR_INT_MODE_P (dest_mode))
47563 error ("invalid type for asm flag output");
47567 if (dest_mode == DImode && !TARGET_64BIT)
47568 dest_mode = SImode;
47570 if (dest_mode != QImode)
47572 rtx destqi = gen_reg_rtx (QImode);
47573 emit_insn (gen_rtx_SET (destqi, x));
47575 if (TARGET_ZERO_EXTEND_WITH_AND
47576 && optimize_function_for_speed_p (cfun))
47578 x = force_reg (dest_mode, const0_rtx);
47580 emit_insn (gen_movstrictqi
47581 (gen_lowpart (QImode, x), destqi));
47584 x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
47587 if (dest_mode != GET_MODE (dest))
47589 rtx tmp = gen_reg_rtx (SImode);
47591 emit_insn (gen_rtx_SET (tmp, x));
47592 emit_insn (gen_zero_extendsidi2 (dest, tmp));
47595 emit_insn (gen_rtx_SET (dest, x));
47597 rtx_insn *seq = get_insns ();
47604 /* If we had no asm flag outputs, clobber the flags. */
47605 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
47606 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
47611 /* Implements target vector targetm.asm.encode_section_info. */
47613 static void ATTRIBUTE_UNUSED
47614 ix86_encode_section_info (tree decl, rtx rtl, int first)
47616 default_encode_section_info (decl, rtl, first);
47618 if (ix86_in_large_data_p (decl))
47619 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
47622 /* Worker function for REVERSE_CONDITION. */
47625 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
47627 return (mode != CCFPmode && mode != CCFPUmode
47628 ? reverse_condition (code)
47629 : reverse_condition_maybe_unordered (code));
47632 /* Output code to perform an x87 FP register move, from OPERANDS[1]
47636 output_387_reg_move (rtx insn, rtx *operands)
47638 if (REG_P (operands[0]))
47640 if (REG_P (operands[1])
47641 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
47643 if (REGNO (operands[0]) == FIRST_STACK_REG)
47644 return output_387_ffreep (operands, 0);
47645 return "fstp\t%y0";
47647 if (STACK_TOP_P (operands[0]))
47648 return "fld%Z1\t%y1";
47651 else if (MEM_P (operands[0]))
47653 gcc_assert (REG_P (operands[1]));
47654 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
47655 return "fstp%Z0\t%y0";
47658 /* There is no non-popping store to memory for XFmode.
47659 So if we need one, follow the store with a load. */
47660 if (GET_MODE (operands[0]) == XFmode)
47661 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
47663 return "fst%Z0\t%y0";
47670 /* Output code to perform a conditional jump to LABEL, if C2 flag in
47671 FP status register is set. */
47674 ix86_emit_fp_unordered_jump (rtx label)
47676 rtx reg = gen_reg_rtx (HImode);
47679 emit_insn (gen_x86_fnstsw_1 (reg));
47681 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
47683 emit_insn (gen_x86_sahf_1 (reg));
47685 temp = gen_rtx_REG (CCmode, FLAGS_REG);
47686 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
47690 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
47692 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
47693 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
47696 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
47697 gen_rtx_LABEL_REF (VOIDmode, label),
47699 temp = gen_rtx_SET (pc_rtx, temp);
47701 emit_jump_insn (temp);
47702 predict_jump (REG_BR_PROB_BASE * 10 / 100);
47705 /* Output code to perform a log1p XFmode calculation. */
47707 void ix86_emit_i387_log1p (rtx op0, rtx op1)
47709 rtx_code_label *label1 = gen_label_rtx ();
47710 rtx_code_label *label2 = gen_label_rtx ();
47712 rtx tmp = gen_reg_rtx (XFmode);
47713 rtx tmp2 = gen_reg_rtx (XFmode);
47716 emit_insn (gen_absxf2 (tmp, op1));
47717 test = gen_rtx_GE (VOIDmode, tmp,
47718 const_double_from_real_value (
47719 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
47721 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
47723 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
47724 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
47725 emit_jump (label2);
47727 emit_label (label1);
47728 emit_move_insn (tmp, CONST1_RTX (XFmode));
47729 emit_insn (gen_addxf3 (tmp, op1, tmp));
47730 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
47731 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
47733 emit_label (label2);
47736 /* Emit code for round calculation. */
47737 void ix86_emit_i387_round (rtx op0, rtx op1)
47739 machine_mode inmode = GET_MODE (op1);
47740 machine_mode outmode = GET_MODE (op0);
47741 rtx e1, e2, res, tmp, tmp1, half;
47742 rtx scratch = gen_reg_rtx (HImode);
47743 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
47744 rtx_code_label *jump_label = gen_label_rtx ();
47746 rtx (*gen_abs) (rtx, rtx);
47747 rtx (*gen_neg) (rtx, rtx);
47752 gen_abs = gen_abssf2;
47755 gen_abs = gen_absdf2;
47758 gen_abs = gen_absxf2;
47761 gcc_unreachable ();
47767 gen_neg = gen_negsf2;
47770 gen_neg = gen_negdf2;
47773 gen_neg = gen_negxf2;
47776 gen_neg = gen_neghi2;
47779 gen_neg = gen_negsi2;
47782 gen_neg = gen_negdi2;
47785 gcc_unreachable ();
47788 e1 = gen_reg_rtx (inmode);
47789 e2 = gen_reg_rtx (inmode);
47790 res = gen_reg_rtx (outmode);
47792 half = const_double_from_real_value (dconsthalf, inmode);
47794 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
47796 /* scratch = fxam(op1) */
47797 emit_insn (gen_rtx_SET (scratch,
47798 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
47800 /* e1 = fabs(op1) */
47801 emit_insn (gen_abs (e1, op1));
47803 /* e2 = e1 + 0.5 */
47804 half = force_reg (inmode, half);
47805 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (inmode, e1, half)));
47807 /* res = floor(e2) */
47808 if (inmode != XFmode)
47810 tmp1 = gen_reg_rtx (XFmode);
47812 emit_insn (gen_rtx_SET (tmp1, gen_rtx_FLOAT_EXTEND (XFmode, e2)));
47822 rtx tmp0 = gen_reg_rtx (XFmode);
47824 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
47826 emit_insn (gen_rtx_SET (res,
47827 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
47828 UNSPEC_TRUNC_NOOP)));
47832 emit_insn (gen_frndintxf2_floor (res, tmp1));
47835 emit_insn (gen_lfloorxfhi2 (res, tmp1));
47838 emit_insn (gen_lfloorxfsi2 (res, tmp1));
47841 emit_insn (gen_lfloorxfdi2 (res, tmp1));
47844 gcc_unreachable ();
47847 /* flags = signbit(a) */
47848 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
47850 /* if (flags) then res = -res */
47851 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
47852 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
47853 gen_rtx_LABEL_REF (VOIDmode, jump_label),
47855 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
47856 predict_jump (REG_BR_PROB_BASE * 50 / 100);
47857 JUMP_LABEL (insn) = jump_label;
47859 emit_insn (gen_neg (res, res));
47861 emit_label (jump_label);
47862 LABEL_NUSES (jump_label) = 1;
47864 emit_move_insn (op0, res);
47867 /* Output code to perform a Newton-Rhapson approximation of a single precision
47868 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
47870 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
47872 rtx x0, x1, e0, e1;
47874 x0 = gen_reg_rtx (mode);
47875 e0 = gen_reg_rtx (mode);
47876 e1 = gen_reg_rtx (mode);
47877 x1 = gen_reg_rtx (mode);
47879 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
47881 b = force_reg (mode, b);
47883 /* x0 = rcp(b) estimate */
47884 if (mode == V16SFmode || mode == V8DFmode)
47885 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
47888 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
47892 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
47895 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
47898 emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
47901 emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
47904 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
47907 /* Output code to perform a Newton-Rhapson approximation of a
47908 single precision floating point [reciprocal] square root. */
47910 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
47912 rtx x0, e0, e1, e2, e3, mthree, mhalf;
47916 x0 = gen_reg_rtx (mode);
47917 e0 = gen_reg_rtx (mode);
47918 e1 = gen_reg_rtx (mode);
47919 e2 = gen_reg_rtx (mode);
47920 e3 = gen_reg_rtx (mode);
47922 real_from_integer (&r, VOIDmode, -3, SIGNED);
47923 mthree = const_double_from_real_value (r, SFmode);
47925 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
47926 mhalf = const_double_from_real_value (r, SFmode);
47927 unspec = UNSPEC_RSQRT;
47929 if (VECTOR_MODE_P (mode))
47931 mthree = ix86_build_const_vector (mode, true, mthree);
47932 mhalf = ix86_build_const_vector (mode, true, mhalf);
47933 /* There is no 512-bit rsqrt. There is however rsqrt14. */
47934 if (GET_MODE_SIZE (mode) == 64)
47935 unspec = UNSPEC_RSQRT14;
47938 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
47939 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
47941 a = force_reg (mode, a);
47943 /* x0 = rsqrt(a) estimate */
47944 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
47947 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
47950 rtx zero = force_reg (mode, CONST0_RTX(mode));
47953 /* Handle masked compare. */
47954 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
47956 mask = gen_reg_rtx (HImode);
47957 /* Imm value 0x4 corresponds to not-equal comparison. */
47958 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
47959 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
47963 mask = gen_reg_rtx (mode);
47964 emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a)));
47965 emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask)));
47970 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
47972 emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
47975 mthree = force_reg (mode, mthree);
47976 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
47978 mhalf = force_reg (mode, mhalf);
47980 /* e3 = -.5 * x0 */
47981 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf)));
47983 /* e3 = -.5 * e0 */
47984 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf)));
47985 /* ret = e2 * e3 */
47986 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3)));
47989 #ifdef TARGET_SOLARIS
47990 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
47993 i386_solaris_elf_named_section (const char *name, unsigned int flags,
47996 /* With Binutils 2.15, the "@unwind" marker must be specified on
47997 every occurrence of the ".eh_frame" section, not just the first
48000 && strcmp (name, ".eh_frame") == 0)
48002 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
48003 flags & SECTION_WRITE ? "aw" : "a");
48008 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
48010 solaris_elf_asm_comdat_section (name, flags, decl);
48015 default_elf_asm_named_section (name, flags, decl);
48017 #endif /* TARGET_SOLARIS */
48019 /* Return the mangling of TYPE if it is an extended fundamental type. */
48021 static const char *
48022 ix86_mangle_type (const_tree type)
48024 type = TYPE_MAIN_VARIANT (type);
48026 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
48027 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
48030 switch (TYPE_MODE (type))
48033 /* __float128 is "g". */
48036 /* "long double" or __float80 is "e". */
48043 /* For 32-bit code we can save PIC register setup by using
48044 __stack_chk_fail_local hidden function instead of calling
48045 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
48046 register, so it is better to call __stack_chk_fail directly. */
48048 static tree ATTRIBUTE_UNUSED
48049 ix86_stack_protect_fail (void)
48051 return TARGET_64BIT
48052 ? default_external_stack_protect_fail ()
48053 : default_hidden_stack_protect_fail ();
48056 /* Select a format to encode pointers in exception handling data. CODE
48057 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
48058 true if the symbol may be affected by dynamic relocations.
48060 ??? All x86 object file formats are capable of representing this.
48061 After all, the relocation needed is the same as for the call insn.
48062 Whether or not a particular assembler allows us to enter such, I
48063 guess we'll have to see. */
48065 asm_preferred_eh_data_format (int code, int global)
48069 int type = DW_EH_PE_sdata8;
48071 || ix86_cmodel == CM_SMALL_PIC
48072 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
48073 type = DW_EH_PE_sdata4;
48074 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
48076 if (ix86_cmodel == CM_SMALL
48077 || (ix86_cmodel == CM_MEDIUM && code))
48078 return DW_EH_PE_udata4;
48079 return DW_EH_PE_absptr;
48082 /* Expand copysign from SIGN to the positive value ABS_VALUE
48083 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
48086 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
48088 machine_mode mode = GET_MODE (sign);
48089 rtx sgn = gen_reg_rtx (mode);
48090 if (mask == NULL_RTX)
48092 machine_mode vmode;
48094 if (mode == SFmode)
48096 else if (mode == DFmode)
48101 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
48102 if (!VECTOR_MODE_P (mode))
48104 /* We need to generate a scalar mode mask in this case. */
48105 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
48106 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
48107 mask = gen_reg_rtx (mode);
48108 emit_insn (gen_rtx_SET (mask, tmp));
48112 mask = gen_rtx_NOT (mode, mask);
48113 emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign)));
48114 emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn)));
48117 /* Expand fabs (OP0) and return a new rtx that holds the result. The
48118 mask for masking out the sign-bit is stored in *SMASK, if that is
48121 ix86_expand_sse_fabs (rtx op0, rtx *smask)
48123 machine_mode vmode, mode = GET_MODE (op0);
48126 xa = gen_reg_rtx (mode);
48127 if (mode == SFmode)
48129 else if (mode == DFmode)
48133 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
48134 if (!VECTOR_MODE_P (mode))
48136 /* We need to generate a scalar mode mask in this case. */
48137 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
48138 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
48139 mask = gen_reg_rtx (mode);
48140 emit_insn (gen_rtx_SET (mask, tmp));
48142 emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask)));
48150 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
48151 swapping the operands if SWAP_OPERANDS is true. The expanded
48152 code is a forward jump to a newly created label in case the
48153 comparison is true. The generated label rtx is returned. */
48154 static rtx_code_label *
48155 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
48156 bool swap_operands)
48158 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
48159 rtx_code_label *label;
48163 std::swap (op0, op1);
48165 label = gen_label_rtx ();
48166 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
48167 emit_insn (gen_rtx_SET (tmp, gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
48168 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
48169 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
48170 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
48171 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
48172 JUMP_LABEL (tmp) = label;
48177 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
48178 using comparison code CODE. Operands are swapped for the comparison if
48179 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
48181 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
48182 bool swap_operands)
48184 rtx (*insn)(rtx, rtx, rtx, rtx);
48185 machine_mode mode = GET_MODE (op0);
48186 rtx mask = gen_reg_rtx (mode);
48189 std::swap (op0, op1);
48191 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
48193 emit_insn (insn (mask, op0, op1,
48194 gen_rtx_fmt_ee (code, mode, op0, op1)));
48198 /* Generate and return a rtx of mode MODE for 2**n where n is the number
48199 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
48201 ix86_gen_TWO52 (machine_mode mode)
48203 REAL_VALUE_TYPE TWO52r;
48206 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
48207 TWO52 = const_double_from_real_value (TWO52r, mode);
48208 TWO52 = force_reg (mode, TWO52);
48213 /* Expand SSE sequence for computing lround from OP1 storing
48216 ix86_expand_lround (rtx op0, rtx op1)
48218 /* C code for the stuff we're doing below:
48219 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
48222 machine_mode mode = GET_MODE (op1);
48223 const struct real_format *fmt;
48224 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
48227 /* load nextafter (0.5, 0.0) */
48228 fmt = REAL_MODE_FORMAT (mode);
48229 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
48230 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
48232 /* adj = copysign (0.5, op1) */
48233 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
48234 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
48236 /* adj = op1 + adj */
48237 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
48239 /* op0 = (imode)adj */
48240 expand_fix (op0, adj, 0);
48243 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
48246 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
48248 /* C code for the stuff we're doing below (for do_floor):
48250 xi -= (double)xi > op1 ? 1 : 0;
48253 machine_mode fmode = GET_MODE (op1);
48254 machine_mode imode = GET_MODE (op0);
48255 rtx ireg, freg, tmp;
48256 rtx_code_label *label;
48258 /* reg = (long)op1 */
48259 ireg = gen_reg_rtx (imode);
48260 expand_fix (ireg, op1, 0);
48262 /* freg = (double)reg */
48263 freg = gen_reg_rtx (fmode);
48264 expand_float (freg, ireg, 0);
48266 /* ireg = (freg > op1) ? ireg - 1 : ireg */
48267 label = ix86_expand_sse_compare_and_jump (UNLE,
48268 freg, op1, !do_floor);
48269 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
48270 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
48271 emit_move_insn (ireg, tmp);
48273 emit_label (label);
48274 LABEL_NUSES (label) = 1;
48276 emit_move_insn (op0, ireg);
48279 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
48280 result in OPERAND0. */
48282 ix86_expand_rint (rtx operand0, rtx operand1)
48284 /* C code for the stuff we're doing below:
48285 xa = fabs (operand1);
48286 if (!isless (xa, 2**52))
48288 xa = xa + 2**52 - 2**52;
48289 return copysign (xa, operand1);
48291 machine_mode mode = GET_MODE (operand0);
48292 rtx res, xa, TWO52, mask;
48293 rtx_code_label *label;
48295 res = gen_reg_rtx (mode);
48296 emit_move_insn (res, operand1);
48298 /* xa = abs (operand1) */
48299 xa = ix86_expand_sse_fabs (res, &mask);
48301 /* if (!isless (xa, TWO52)) goto label; */
48302 TWO52 = ix86_gen_TWO52 (mode);
48303 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48305 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
48306 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
48308 ix86_sse_copysign_to_positive (res, xa, res, mask);
48310 emit_label (label);
48311 LABEL_NUSES (label) = 1;
48313 emit_move_insn (operand0, res);
48316 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
48319 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
48321 /* C code for the stuff we expand below.
48322 double xa = fabs (x), x2;
48323 if (!isless (xa, TWO52))
48325 xa = xa + TWO52 - TWO52;
48326 x2 = copysign (xa, x);
48335 machine_mode mode = GET_MODE (operand0);
48336 rtx xa, TWO52, tmp, one, res, mask;
48337 rtx_code_label *label;
48339 TWO52 = ix86_gen_TWO52 (mode);
48341 /* Temporary for holding the result, initialized to the input
48342 operand to ease control flow. */
48343 res = gen_reg_rtx (mode);
48344 emit_move_insn (res, operand1);
48346 /* xa = abs (operand1) */
48347 xa = ix86_expand_sse_fabs (res, &mask);
48349 /* if (!isless (xa, TWO52)) goto label; */
48350 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48352 /* xa = xa + TWO52 - TWO52; */
48353 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
48354 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
48356 /* xa = copysign (xa, operand1) */
48357 ix86_sse_copysign_to_positive (xa, xa, res, mask);
48359 /* generate 1.0 or -1.0 */
48360 one = force_reg (mode,
48361 const_double_from_real_value (do_floor
48362 ? dconst1 : dconstm1, mode));
48364 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
48365 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
48366 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
48367 /* We always need to subtract here to preserve signed zero. */
48368 tmp = expand_simple_binop (mode, MINUS,
48369 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
48370 emit_move_insn (res, tmp);
48372 emit_label (label);
48373 LABEL_NUSES (label) = 1;
48375 emit_move_insn (operand0, res);
48378 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
48381 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
48383 /* C code for the stuff we expand below.
48384 double xa = fabs (x), x2;
48385 if (!isless (xa, TWO52))
48387 x2 = (double)(long)x;
48394 if (HONOR_SIGNED_ZEROS (mode))
48395 return copysign (x2, x);
48398 machine_mode mode = GET_MODE (operand0);
48399 rtx xa, xi, TWO52, tmp, one, res, mask;
48400 rtx_code_label *label;
48402 TWO52 = ix86_gen_TWO52 (mode);
48404 /* Temporary for holding the result, initialized to the input
48405 operand to ease control flow. */
48406 res = gen_reg_rtx (mode);
48407 emit_move_insn (res, operand1);
48409 /* xa = abs (operand1) */
48410 xa = ix86_expand_sse_fabs (res, &mask);
48412 /* if (!isless (xa, TWO52)) goto label; */
48413 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48415 /* xa = (double)(long)x */
48416 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
48417 expand_fix (xi, res, 0);
48418 expand_float (xa, xi, 0);
48421 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
48423 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
48424 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
48425 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
48426 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
48427 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
48428 emit_move_insn (res, tmp);
48430 if (HONOR_SIGNED_ZEROS (mode))
48431 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
48433 emit_label (label);
48434 LABEL_NUSES (label) = 1;
48436 emit_move_insn (operand0, res);
48439 /* Expand SSE sequence for computing round from OPERAND1 storing
48440 into OPERAND0. Sequence that works without relying on DImode truncation
48441 via cvttsd2siq that is only available on 64bit targets. */
48443 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
48445 /* C code for the stuff we expand below.
48446 double xa = fabs (x), xa2, x2;
48447 if (!isless (xa, TWO52))
48449 Using the absolute value and copying back sign makes
48450 -0.0 -> -0.0 correct.
48451 xa2 = xa + TWO52 - TWO52;
48456 else if (dxa > 0.5)
48458 x2 = copysign (xa2, x);
48461 machine_mode mode = GET_MODE (operand0);
48462 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
48463 rtx_code_label *label;
48465 TWO52 = ix86_gen_TWO52 (mode);
48467 /* Temporary for holding the result, initialized to the input
48468 operand to ease control flow. */
48469 res = gen_reg_rtx (mode);
48470 emit_move_insn (res, operand1);
48472 /* xa = abs (operand1) */
48473 xa = ix86_expand_sse_fabs (res, &mask);
48475 /* if (!isless (xa, TWO52)) goto label; */
48476 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48478 /* xa2 = xa + TWO52 - TWO52; */
48479 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
48480 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
48482 /* dxa = xa2 - xa; */
48483 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
48485 /* generate 0.5, 1.0 and -0.5 */
48486 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
48487 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
48488 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
48492 tmp = gen_reg_rtx (mode);
48493 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
48494 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
48495 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
48496 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
48497 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
48498 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
48499 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
48500 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
48502 /* res = copysign (xa2, operand1) */
48503 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
48505 emit_label (label);
48506 LABEL_NUSES (label) = 1;
48508 emit_move_insn (operand0, res);
48511 /* Expand SSE sequence for computing trunc from OPERAND1 storing
48514 ix86_expand_trunc (rtx operand0, rtx operand1)
48516 /* C code for SSE variant we expand below.
48517 double xa = fabs (x), x2;
48518 if (!isless (xa, TWO52))
48520 x2 = (double)(long)x;
48521 if (HONOR_SIGNED_ZEROS (mode))
48522 return copysign (x2, x);
48525 machine_mode mode = GET_MODE (operand0);
48526 rtx xa, xi, TWO52, res, mask;
48527 rtx_code_label *label;
48529 TWO52 = ix86_gen_TWO52 (mode);
48531 /* Temporary for holding the result, initialized to the input
48532 operand to ease control flow. */
48533 res = gen_reg_rtx (mode);
48534 emit_move_insn (res, operand1);
48536 /* xa = abs (operand1) */
48537 xa = ix86_expand_sse_fabs (res, &mask);
48539 /* if (!isless (xa, TWO52)) goto label; */
48540 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48542 /* x = (double)(long)x */
48543 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
48544 expand_fix (xi, res, 0);
48545 expand_float (res, xi, 0);
48547 if (HONOR_SIGNED_ZEROS (mode))
48548 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
48550 emit_label (label);
48551 LABEL_NUSES (label) = 1;
48553 emit_move_insn (operand0, res);
48556 /* Expand SSE sequence for computing trunc from OPERAND1 storing
48559 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
48561 machine_mode mode = GET_MODE (operand0);
48562 rtx xa, mask, TWO52, one, res, smask, tmp;
48563 rtx_code_label *label;
48565 /* C code for SSE variant we expand below.
48566 double xa = fabs (x), x2;
48567 if (!isless (xa, TWO52))
48569 xa2 = xa + TWO52 - TWO52;
48573 x2 = copysign (xa2, x);
48577 TWO52 = ix86_gen_TWO52 (mode);
48579 /* Temporary for holding the result, initialized to the input
48580 operand to ease control flow. */
48581 res = gen_reg_rtx (mode);
48582 emit_move_insn (res, operand1);
48584 /* xa = abs (operand1) */
48585 xa = ix86_expand_sse_fabs (res, &smask);
48587 /* if (!isless (xa, TWO52)) goto label; */
48588 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48590 /* res = xa + TWO52 - TWO52; */
48591 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
48592 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
48593 emit_move_insn (res, tmp);
48596 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
48598 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
48599 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
48600 emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one)));
48601 tmp = expand_simple_binop (mode, MINUS,
48602 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
48603 emit_move_insn (res, tmp);
48605 /* res = copysign (res, operand1) */
48606 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
48608 emit_label (label);
48609 LABEL_NUSES (label) = 1;
48611 emit_move_insn (operand0, res);
48614 /* Expand SSE sequence for computing round from OPERAND1 storing
48617 ix86_expand_round (rtx operand0, rtx operand1)
48619 /* C code for the stuff we're doing below:
48620 double xa = fabs (x);
48621 if (!isless (xa, TWO52))
48623 xa = (double)(long)(xa + nextafter (0.5, 0.0));
48624 return copysign (xa, x);
48626 machine_mode mode = GET_MODE (operand0);
48627 rtx res, TWO52, xa, xi, half, mask;
48628 rtx_code_label *label;
48629 const struct real_format *fmt;
48630 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
48632 /* Temporary for holding the result, initialized to the input
48633 operand to ease control flow. */
48634 res = gen_reg_rtx (mode);
48635 emit_move_insn (res, operand1);
48637 TWO52 = ix86_gen_TWO52 (mode);
48638 xa = ix86_expand_sse_fabs (res, &mask);
48639 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48641 /* load nextafter (0.5, 0.0) */
48642 fmt = REAL_MODE_FORMAT (mode);
48643 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
48644 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
48646 /* xa = xa + 0.5 */
48647 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
48648 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
48650 /* xa = (double)(int64_t)xa */
48651 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
48652 expand_fix (xi, xa, 0);
48653 expand_float (xa, xi, 0);
48655 /* res = copysign (xa, operand1) */
48656 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
48658 emit_label (label);
48659 LABEL_NUSES (label) = 1;
48661 emit_move_insn (operand0, res);
48664 /* Expand SSE sequence for computing round
48665 from OP1 storing into OP0 using sse4 round insn. */
48667 ix86_expand_round_sse4 (rtx op0, rtx op1)
48669 machine_mode mode = GET_MODE (op0);
48670 rtx e1, e2, res, half;
48671 const struct real_format *fmt;
48672 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
48673 rtx (*gen_copysign) (rtx, rtx, rtx);
48674 rtx (*gen_round) (rtx, rtx, rtx);
48679 gen_copysign = gen_copysignsf3;
48680 gen_round = gen_sse4_1_roundsf2;
48683 gen_copysign = gen_copysigndf3;
48684 gen_round = gen_sse4_1_rounddf2;
48687 gcc_unreachable ();
48690 /* round (a) = trunc (a + copysign (0.5, a)) */
48692 /* load nextafter (0.5, 0.0) */
48693 fmt = REAL_MODE_FORMAT (mode);
48694 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
48695 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
48696 half = const_double_from_real_value (pred_half, mode);
48698 /* e1 = copysign (0.5, op1) */
48699 e1 = gen_reg_rtx (mode);
48700 emit_insn (gen_copysign (e1, half, op1));
48702 /* e2 = op1 + e1 */
48703 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
48705 /* res = trunc (e2) */
48706 res = gen_reg_rtx (mode);
48707 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
48709 emit_move_insn (op0, res);
48713 /* Table of valid machine attributes. */
48714 static const struct attribute_spec ix86_attribute_table[] =
48716 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
48717 affects_type_identity } */
48718 /* Stdcall attribute says callee is responsible for popping arguments
48719 if they are not variable. */
48720 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48722 /* Fastcall attribute says callee is responsible for popping arguments
48723 if they are not variable. */
48724 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48726 /* Thiscall attribute says callee is responsible for popping arguments
48727 if they are not variable. */
48728 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48730 /* Cdecl attribute says the callee is a normal C declaration */
48731 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48733 /* Regparm attribute specifies how many integer arguments are to be
48734 passed in registers. */
48735 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
48737 /* Sseregparm attribute says we are using x86_64 calling conventions
48738 for FP arguments. */
48739 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48741 /* The transactional memory builtins are implicitly regparm or fastcall
48742 depending on the ABI. Override the generic do-nothing attribute that
48743 these builtins were declared with. */
48744 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
48746 /* force_align_arg_pointer says this function realigns the stack at entry. */
48747 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
48748 false, true, true, ix86_handle_force_align_arg_pointer_attribute, false },
48749 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
48750 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
48751 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
48752 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
48755 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
48757 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
48759 #ifdef SUBTARGET_ATTRIBUTE_TABLE
48760 SUBTARGET_ATTRIBUTE_TABLE,
48762 /* ms_abi and sysv_abi calling convention function attributes. */
48763 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
48764 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
48765 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
48767 { "callee_pop_aggregate_return", 1, 1, false, true, true,
48768 ix86_handle_callee_pop_aggregate_return, true },
48770 { NULL, 0, 0, false, false, false, NULL, false }
48773 /* Implement targetm.vectorize.builtin_vectorization_cost. */
48775 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
48780 switch (type_of_cost)
48783 return ix86_cost->scalar_stmt_cost;
48786 return ix86_cost->scalar_load_cost;
48789 return ix86_cost->scalar_store_cost;
48792 return ix86_cost->vec_stmt_cost;
48795 return ix86_cost->vec_align_load_cost;
48798 return ix86_cost->vec_store_cost;
48800 case vec_to_scalar:
48801 return ix86_cost->vec_to_scalar_cost;
48803 case scalar_to_vec:
48804 return ix86_cost->scalar_to_vec_cost;
48806 case unaligned_load:
48807 case unaligned_store:
48808 return ix86_cost->vec_unalign_load_cost;
48810 case cond_branch_taken:
48811 return ix86_cost->cond_taken_branch_cost;
48813 case cond_branch_not_taken:
48814 return ix86_cost->cond_not_taken_branch_cost;
48817 case vec_promote_demote:
48818 return ix86_cost->vec_stmt_cost;
48820 case vec_construct:
48821 elements = TYPE_VECTOR_SUBPARTS (vectype);
48822 return ix86_cost->vec_stmt_cost * (elements / 2 + 1);
48825 gcc_unreachable ();
48829 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
48830 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
48831 insn every time. */
48833 static GTY(()) rtx_insn *vselect_insn;
48835 /* Initialize vselect_insn. */
48838 init_vselect_insn (void)
48843 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
48844 for (i = 0; i < MAX_VECT_LEN; ++i)
48845 XVECEXP (x, 0, i) = const0_rtx;
48846 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
48848 x = gen_rtx_SET (const0_rtx, x);
48850 vselect_insn = emit_insn (x);
48854 /* Construct (set target (vec_select op0 (parallel perm))) and
48855 return true if that's a valid instruction in the active ISA. */
48858 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
48859 unsigned nelt, bool testing_p)
48862 rtx x, save_vconcat;
48865 if (vselect_insn == NULL_RTX)
48866 init_vselect_insn ();
48868 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
48869 PUT_NUM_ELEM (XVEC (x, 0), nelt);
48870 for (i = 0; i < nelt; ++i)
48871 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
48872 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
48873 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
48874 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
48875 SET_DEST (PATTERN (vselect_insn)) = target;
48876 icode = recog_memoized (vselect_insn);
48878 if (icode >= 0 && !testing_p)
48879 emit_insn (copy_rtx (PATTERN (vselect_insn)));
48881 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
48882 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
48883 INSN_CODE (vselect_insn) = -1;
48888 /* Similar, but generate a vec_concat from op0 and op1 as well. */
48891 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
48892 const unsigned char *perm, unsigned nelt,
48895 machine_mode v2mode;
48899 if (vselect_insn == NULL_RTX)
48900 init_vselect_insn ();
48902 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
48903 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
48904 PUT_MODE (x, v2mode);
48907 ok = expand_vselect (target, x, perm, nelt, testing_p);
48908 XEXP (x, 0) = const0_rtx;
48909 XEXP (x, 1) = const0_rtx;
48913 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
48914 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
48917 expand_vec_perm_blend (struct expand_vec_perm_d *d)
48919 machine_mode mmode, vmode = d->vmode;
48920 unsigned i, mask, nelt = d->nelt;
48921 rtx target, op0, op1, maskop, x;
48922 rtx rperm[32], vperm;
48924 if (d->one_operand_p)
48926 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
48927 && (TARGET_AVX512BW
48928 || GET_MODE_UNIT_SIZE (vmode) >= 4))
48930 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
48932 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
48934 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
48939 /* This is a blend, not a permute. Elements must stay in their
48940 respective lanes. */
48941 for (i = 0; i < nelt; ++i)
48943 unsigned e = d->perm[i];
48944 if (!(e == i || e == i + nelt))
48951 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
48952 decision should be extracted elsewhere, so that we only try that
48953 sequence once all budget==3 options have been tried. */
48954 target = d->target;
48973 for (i = 0; i < nelt; ++i)
48974 mask |= (d->perm[i] >= nelt) << i;
48978 for (i = 0; i < 2; ++i)
48979 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
48984 for (i = 0; i < 4; ++i)
48985 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
48990 /* See if bytes move in pairs so we can use pblendw with
48991 an immediate argument, rather than pblendvb with a vector
48993 for (i = 0; i < 16; i += 2)
48994 if (d->perm[i] + 1 != d->perm[i + 1])
48997 for (i = 0; i < nelt; ++i)
48998 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
49001 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
49002 vperm = force_reg (vmode, vperm);
49004 if (GET_MODE_SIZE (vmode) == 16)
49005 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
49007 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
49008 if (target != d->target)
49009 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
49013 for (i = 0; i < 8; ++i)
49014 mask |= (d->perm[i * 2] >= 16) << i;
49019 target = gen_reg_rtx (vmode);
49020 op0 = gen_lowpart (vmode, op0);
49021 op1 = gen_lowpart (vmode, op1);
49025 /* See if bytes move in pairs. If not, vpblendvb must be used. */
49026 for (i = 0; i < 32; i += 2)
49027 if (d->perm[i] + 1 != d->perm[i + 1])
49029 /* See if bytes move in quadruplets. If yes, vpblendd
49030 with immediate can be used. */
49031 for (i = 0; i < 32; i += 4)
49032 if (d->perm[i] + 2 != d->perm[i + 2])
49036 /* See if bytes move the same in both lanes. If yes,
49037 vpblendw with immediate can be used. */
49038 for (i = 0; i < 16; i += 2)
49039 if (d->perm[i] + 16 != d->perm[i + 16])
49042 /* Use vpblendw. */
49043 for (i = 0; i < 16; ++i)
49044 mask |= (d->perm[i * 2] >= 32) << i;
49049 /* Use vpblendd. */
49050 for (i = 0; i < 8; ++i)
49051 mask |= (d->perm[i * 4] >= 32) << i;
49056 /* See if words move in pairs. If yes, vpblendd can be used. */
49057 for (i = 0; i < 16; i += 2)
49058 if (d->perm[i] + 1 != d->perm[i + 1])
49062 /* See if words move the same in both lanes. If not,
49063 vpblendvb must be used. */
49064 for (i = 0; i < 8; i++)
49065 if (d->perm[i] + 8 != d->perm[i + 8])
49067 /* Use vpblendvb. */
49068 for (i = 0; i < 32; ++i)
49069 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
49073 target = gen_reg_rtx (vmode);
49074 op0 = gen_lowpart (vmode, op0);
49075 op1 = gen_lowpart (vmode, op1);
49076 goto finish_pblendvb;
49079 /* Use vpblendw. */
49080 for (i = 0; i < 16; ++i)
49081 mask |= (d->perm[i] >= 16) << i;
49085 /* Use vpblendd. */
49086 for (i = 0; i < 8; ++i)
49087 mask |= (d->perm[i * 2] >= 16) << i;
49092 /* Use vpblendd. */
49093 for (i = 0; i < 4; ++i)
49094 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
49099 gcc_unreachable ();
49122 if (mmode != VOIDmode)
49123 maskop = force_reg (mmode, gen_int_mode (mask, mmode));
49125 maskop = GEN_INT (mask);
49127 /* This matches five different patterns with the different modes. */
49128 x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
49129 x = gen_rtx_SET (target, x);
49131 if (target != d->target)
49132 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
49137 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
49138 in terms of the variable form of vpermilps.
49140 Note that we will have already failed the immediate input vpermilps,
49141 which requires that the high and low part shuffle be identical; the
49142 variable form doesn't require that. */
49145 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
49147 rtx rperm[8], vperm;
49150 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
49153 /* We can only permute within the 128-bit lane. */
49154 for (i = 0; i < 8; ++i)
49156 unsigned e = d->perm[i];
49157 if (i < 4 ? e >= 4 : e < 4)
49164 for (i = 0; i < 8; ++i)
49166 unsigned e = d->perm[i];
49168 /* Within each 128-bit lane, the elements of op0 are numbered
49169 from 0 and the elements of op1 are numbered from 4. */
49175 rperm[i] = GEN_INT (e);
49178 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
49179 vperm = force_reg (V8SImode, vperm);
49180 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
49185 /* Return true if permutation D can be performed as VMODE permutation
49189 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
49191 unsigned int i, j, chunk;
49193 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
49194 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
49195 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
49198 if (GET_MODE_NUNITS (vmode) >= d->nelt)
49201 chunk = d->nelt / GET_MODE_NUNITS (vmode);
49202 for (i = 0; i < d->nelt; i += chunk)
49203 if (d->perm[i] & (chunk - 1))
49206 for (j = 1; j < chunk; ++j)
49207 if (d->perm[i] + j != d->perm[i + j])
49213 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
49214 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
49217 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
49219 unsigned i, nelt, eltsz, mask;
49220 unsigned char perm[64];
49221 machine_mode vmode = V16QImode;
49222 rtx rperm[64], vperm, target, op0, op1;
49226 if (!d->one_operand_p)
49228 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
49231 && valid_perm_using_mode_p (V2TImode, d))
49236 /* Use vperm2i128 insn. The pattern uses
49237 V4DImode instead of V2TImode. */
49238 target = d->target;
49239 if (d->vmode != V4DImode)
49240 target = gen_reg_rtx (V4DImode);
49241 op0 = gen_lowpart (V4DImode, d->op0);
49242 op1 = gen_lowpart (V4DImode, d->op1);
49244 = GEN_INT ((d->perm[0] / (nelt / 2))
49245 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
49246 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
49247 if (target != d->target)
49248 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
49256 if (GET_MODE_SIZE (d->vmode) == 16)
49261 else if (GET_MODE_SIZE (d->vmode) == 32)
49266 /* V4DImode should be already handled through
49267 expand_vselect by vpermq instruction. */
49268 gcc_assert (d->vmode != V4DImode);
49271 if (d->vmode == V8SImode
49272 || d->vmode == V16HImode
49273 || d->vmode == V32QImode)
49275 /* First see if vpermq can be used for
49276 V8SImode/V16HImode/V32QImode. */
49277 if (valid_perm_using_mode_p (V4DImode, d))
49279 for (i = 0; i < 4; i++)
49280 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
49283 target = gen_reg_rtx (V4DImode);
49284 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
49287 emit_move_insn (d->target,
49288 gen_lowpart (d->vmode, target));
49294 /* Next see if vpermd can be used. */
49295 if (valid_perm_using_mode_p (V8SImode, d))
49298 /* Or if vpermps can be used. */
49299 else if (d->vmode == V8SFmode)
49302 if (vmode == V32QImode)
49304 /* vpshufb only works intra lanes, it is not
49305 possible to shuffle bytes in between the lanes. */
49306 for (i = 0; i < nelt; ++i)
49307 if ((d->perm[i] ^ i) & (nelt / 2))
49311 else if (GET_MODE_SIZE (d->vmode) == 64)
49313 if (!TARGET_AVX512BW)
49316 /* If vpermq didn't work, vpshufb won't work either. */
49317 if (d->vmode == V8DFmode || d->vmode == V8DImode)
49321 if (d->vmode == V16SImode
49322 || d->vmode == V32HImode
49323 || d->vmode == V64QImode)
49325 /* First see if vpermq can be used for
49326 V16SImode/V32HImode/V64QImode. */
49327 if (valid_perm_using_mode_p (V8DImode, d))
49329 for (i = 0; i < 8; i++)
49330 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
49333 target = gen_reg_rtx (V8DImode);
49334 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
49337 emit_move_insn (d->target,
49338 gen_lowpart (d->vmode, target));
49344 /* Next see if vpermd can be used. */
49345 if (valid_perm_using_mode_p (V16SImode, d))
49348 /* Or if vpermps can be used. */
49349 else if (d->vmode == V16SFmode)
49351 if (vmode == V64QImode)
49353 /* vpshufb only works intra lanes, it is not
49354 possible to shuffle bytes in between the lanes. */
49355 for (i = 0; i < nelt; ++i)
49356 if ((d->perm[i] ^ i) & (nelt / 4))
49367 if (vmode == V8SImode)
49368 for (i = 0; i < 8; ++i)
49369 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
49370 else if (vmode == V16SImode)
49371 for (i = 0; i < 16; ++i)
49372 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
49375 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
49376 if (!d->one_operand_p)
49377 mask = 2 * nelt - 1;
49378 else if (vmode == V16QImode)
49380 else if (vmode == V64QImode)
49381 mask = nelt / 4 - 1;
49383 mask = nelt / 2 - 1;
49385 for (i = 0; i < nelt; ++i)
49387 unsigned j, e = d->perm[i] & mask;
49388 for (j = 0; j < eltsz; ++j)
49389 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
49393 vperm = gen_rtx_CONST_VECTOR (vmode,
49394 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
49395 vperm = force_reg (vmode, vperm);
49397 target = d->target;
49398 if (d->vmode != vmode)
49399 target = gen_reg_rtx (vmode);
49400 op0 = gen_lowpart (vmode, d->op0);
49401 if (d->one_operand_p)
49403 if (vmode == V16QImode)
49404 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
49405 else if (vmode == V32QImode)
49406 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
49407 else if (vmode == V64QImode)
49408 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
49409 else if (vmode == V8SFmode)
49410 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
49411 else if (vmode == V8SImode)
49412 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
49413 else if (vmode == V16SFmode)
49414 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
49415 else if (vmode == V16SImode)
49416 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
49418 gcc_unreachable ();
49422 op1 = gen_lowpart (vmode, d->op1);
49423 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
49425 if (target != d->target)
49426 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
49431 /* For V*[QHS]Imode permutations, check if the same permutation
49432 can't be performed in a 2x, 4x or 8x wider inner mode. */
49435 canonicalize_vector_int_perm (const struct expand_vec_perm_d *d,
49436 struct expand_vec_perm_d *nd)
49439 enum machine_mode mode = VOIDmode;
49443 case V16QImode: mode = V8HImode; break;
49444 case V32QImode: mode = V16HImode; break;
49445 case V64QImode: mode = V32HImode; break;
49446 case V8HImode: mode = V4SImode; break;
49447 case V16HImode: mode = V8SImode; break;
49448 case V32HImode: mode = V16SImode; break;
49449 case V4SImode: mode = V2DImode; break;
49450 case V8SImode: mode = V4DImode; break;
49451 case V16SImode: mode = V8DImode; break;
49452 default: return false;
49454 for (i = 0; i < d->nelt; i += 2)
49455 if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1)
49458 nd->nelt = d->nelt / 2;
49459 for (i = 0; i < nd->nelt; i++)
49460 nd->perm[i] = d->perm[2 * i] / 2;
49461 if (GET_MODE_INNER (mode) != DImode)
49462 canonicalize_vector_int_perm (nd, nd);
49465 nd->one_operand_p = d->one_operand_p;
49466 nd->testing_p = d->testing_p;
49467 if (d->op0 == d->op1)
49468 nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0);
49471 nd->op0 = gen_lowpart (nd->vmode, d->op0);
49472 nd->op1 = gen_lowpart (nd->vmode, d->op1);
49475 nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1);
49477 nd->target = gen_reg_rtx (nd->vmode);
49482 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
49483 in a single instruction. */
49486 expand_vec_perm_1 (struct expand_vec_perm_d *d)
49488 unsigned i, nelt = d->nelt;
49489 struct expand_vec_perm_d nd;
49491 /* Check plain VEC_SELECT first, because AVX has instructions that could
49492 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
49493 input where SEL+CONCAT may not. */
49494 if (d->one_operand_p)
49496 int mask = nelt - 1;
49497 bool identity_perm = true;
49498 bool broadcast_perm = true;
49500 for (i = 0; i < nelt; i++)
49502 nd.perm[i] = d->perm[i] & mask;
49503 if (nd.perm[i] != i)
49504 identity_perm = false;
49506 broadcast_perm = false;
49512 emit_move_insn (d->target, d->op0);
49515 else if (broadcast_perm && TARGET_AVX2)
49517 /* Use vpbroadcast{b,w,d}. */
49518 rtx (*gen) (rtx, rtx) = NULL;
49522 if (TARGET_AVX512BW)
49523 gen = gen_avx512bw_vec_dupv64qi_1;
49526 gen = gen_avx2_pbroadcastv32qi_1;
49529 if (TARGET_AVX512BW)
49530 gen = gen_avx512bw_vec_dupv32hi_1;
49533 gen = gen_avx2_pbroadcastv16hi_1;
49536 if (TARGET_AVX512F)
49537 gen = gen_avx512f_vec_dupv16si_1;
49540 gen = gen_avx2_pbroadcastv8si_1;
49543 gen = gen_avx2_pbroadcastv16qi;
49546 gen = gen_avx2_pbroadcastv8hi;
49549 if (TARGET_AVX512F)
49550 gen = gen_avx512f_vec_dupv16sf_1;
49553 gen = gen_avx2_vec_dupv8sf_1;
49556 if (TARGET_AVX512F)
49557 gen = gen_avx512f_vec_dupv8df_1;
49560 if (TARGET_AVX512F)
49561 gen = gen_avx512f_vec_dupv8di_1;
49563 /* For other modes prefer other shuffles this function creates. */
49569 emit_insn (gen (d->target, d->op0));
49574 if (expand_vselect (d->target, d->op0, nd.perm, nelt, d->testing_p))
49577 /* There are plenty of patterns in sse.md that are written for
49578 SEL+CONCAT and are not replicated for a single op. Perhaps
49579 that should be changed, to avoid the nastiness here. */
49581 /* Recognize interleave style patterns, which means incrementing
49582 every other permutation operand. */
49583 for (i = 0; i < nelt; i += 2)
49585 nd.perm[i] = d->perm[i] & mask;
49586 nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt;
49588 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
49592 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
49595 for (i = 0; i < nelt; i += 4)
49597 nd.perm[i + 0] = d->perm[i + 0] & mask;
49598 nd.perm[i + 1] = d->perm[i + 1] & mask;
49599 nd.perm[i + 2] = (d->perm[i + 2] & mask) + nelt;
49600 nd.perm[i + 3] = (d->perm[i + 3] & mask) + nelt;
49603 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
49609 /* Finally, try the fully general two operand permute. */
49610 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
49614 /* Recognize interleave style patterns with reversed operands. */
49615 if (!d->one_operand_p)
49617 for (i = 0; i < nelt; ++i)
49619 unsigned e = d->perm[i];
49627 if (expand_vselect_vconcat (d->target, d->op1, d->op0, nd.perm, nelt,
49632 /* Try the SSE4.1 blend variable merge instructions. */
49633 if (expand_vec_perm_blend (d))
49636 /* Try one of the AVX vpermil variable permutations. */
49637 if (expand_vec_perm_vpermil (d))
49640 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
49641 vpshufb, vpermd, vpermps or vpermq variable permutation. */
49642 if (expand_vec_perm_pshufb (d))
49645 /* Try the AVX2 vpalignr instruction. */
49646 if (expand_vec_perm_palignr (d, true))
49649 /* Try the AVX512F vpermi2 instructions. */
49650 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
49653 /* See if we can get the same permutation in different vector integer
49655 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
49658 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
49664 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
49665 in terms of a pair of pshuflw + pshufhw instructions. */
49668 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
49670 unsigned char perm2[MAX_VECT_LEN];
49674 if (d->vmode != V8HImode || !d->one_operand_p)
49677 /* The two permutations only operate in 64-bit lanes. */
49678 for (i = 0; i < 4; ++i)
49679 if (d->perm[i] >= 4)
49681 for (i = 4; i < 8; ++i)
49682 if (d->perm[i] < 4)
49688 /* Emit the pshuflw. */
49689 memcpy (perm2, d->perm, 4);
49690 for (i = 4; i < 8; ++i)
49692 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
49695 /* Emit the pshufhw. */
49696 memcpy (perm2 + 4, d->perm + 4, 4);
49697 for (i = 0; i < 4; ++i)
49699 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
49705 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
49706 the permutation using the SSSE3 palignr instruction. This succeeds
49707 when all of the elements in PERM fit within one vector and we merely
49708 need to shift them down so that a single vector permutation has a
49709 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
49710 the vpalignr instruction itself can perform the requested permutation. */
49713 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
49715 unsigned i, nelt = d->nelt;
49716 unsigned min, max, minswap, maxswap;
49717 bool in_order, ok, swap = false;
49719 struct expand_vec_perm_d dcopy;
49721 /* Even with AVX, palignr only operates on 128-bit vectors,
49722 in AVX2 palignr operates on both 128-bit lanes. */
49723 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
49724 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
49729 minswap = 2 * nelt;
49731 for (i = 0; i < nelt; ++i)
49733 unsigned e = d->perm[i];
49734 unsigned eswap = d->perm[i] ^ nelt;
49735 if (GET_MODE_SIZE (d->vmode) == 32)
49737 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
49738 eswap = e ^ (nelt / 2);
49744 if (eswap < minswap)
49746 if (eswap > maxswap)
49750 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
49752 if (d->one_operand_p
49754 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
49755 ? nelt / 2 : nelt))
49762 /* Given that we have SSSE3, we know we'll be able to implement the
49763 single operand permutation after the palignr with pshufb for
49764 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
49766 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
49772 dcopy.op0 = d->op1;
49773 dcopy.op1 = d->op0;
49774 for (i = 0; i < nelt; ++i)
49775 dcopy.perm[i] ^= nelt;
49779 for (i = 0; i < nelt; ++i)
49781 unsigned e = dcopy.perm[i];
49782 if (GET_MODE_SIZE (d->vmode) == 32
49784 && (e & (nelt / 2 - 1)) < min)
49785 e = e - min - (nelt / 2);
49792 dcopy.one_operand_p = true;
49794 if (single_insn_only_p && !in_order)
49797 /* For AVX2, test whether we can permute the result in one instruction. */
49802 dcopy.op1 = dcopy.op0;
49803 return expand_vec_perm_1 (&dcopy);
49806 shift = GEN_INT (min * GET_MODE_UNIT_BITSIZE (d->vmode));
49807 if (GET_MODE_SIZE (d->vmode) == 16)
49809 target = gen_reg_rtx (TImode);
49810 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
49811 gen_lowpart (TImode, dcopy.op0), shift));
49815 target = gen_reg_rtx (V2TImode);
49816 emit_insn (gen_avx2_palignrv2ti (target,
49817 gen_lowpart (V2TImode, dcopy.op1),
49818 gen_lowpart (V2TImode, dcopy.op0),
49822 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
49824 /* Test for the degenerate case where the alignment by itself
49825 produces the desired permutation. */
49828 emit_move_insn (d->target, dcopy.op0);
49832 ok = expand_vec_perm_1 (&dcopy);
49833 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
49838 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
49839 the permutation using the SSE4_1 pblendv instruction. Potentially
49840 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
49843 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
49845 unsigned i, which, nelt = d->nelt;
49846 struct expand_vec_perm_d dcopy, dcopy1;
49847 machine_mode vmode = d->vmode;
49850 /* Use the same checks as in expand_vec_perm_blend. */
49851 if (d->one_operand_p)
49853 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
49855 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
49857 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
49862 /* Figure out where permutation elements stay not in their
49863 respective lanes. */
49864 for (i = 0, which = 0; i < nelt; ++i)
49866 unsigned e = d->perm[i];
49868 which |= (e < nelt ? 1 : 2);
49870 /* We can pblend the part where elements stay not in their
49871 respective lanes only when these elements are all in one
49872 half of a permutation.
49873 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
49874 lanes, but both 8 and 9 >= 8
49875 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
49876 respective lanes and 8 >= 8, but 2 not. */
49877 if (which != 1 && which != 2)
49879 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
49882 /* First we apply one operand permutation to the part where
49883 elements stay not in their respective lanes. */
49886 dcopy.op0 = dcopy.op1 = d->op1;
49888 dcopy.op0 = dcopy.op1 = d->op0;
49890 dcopy.target = gen_reg_rtx (vmode);
49891 dcopy.one_operand_p = true;
49893 for (i = 0; i < nelt; ++i)
49894 dcopy.perm[i] = d->perm[i] & (nelt - 1);
49896 ok = expand_vec_perm_1 (&dcopy);
49897 if (GET_MODE_SIZE (vmode) != 16 && !ok)
49904 /* Next we put permuted elements into their positions. */
49907 dcopy1.op1 = dcopy.target;
49909 dcopy1.op0 = dcopy.target;
49911 for (i = 0; i < nelt; ++i)
49912 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
49914 ok = expand_vec_perm_blend (&dcopy1);
49920 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
49922 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
49923 a two vector permutation into a single vector permutation by using
49924 an interleave operation to merge the vectors. */
49927 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
49929 struct expand_vec_perm_d dremap, dfinal;
49930 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
49931 unsigned HOST_WIDE_INT contents;
49932 unsigned char remap[2 * MAX_VECT_LEN];
49934 bool ok, same_halves = false;
49936 if (GET_MODE_SIZE (d->vmode) == 16)
49938 if (d->one_operand_p)
49941 else if (GET_MODE_SIZE (d->vmode) == 32)
49945 /* For 32-byte modes allow even d->one_operand_p.
49946 The lack of cross-lane shuffling in some instructions
49947 might prevent a single insn shuffle. */
49949 dfinal.testing_p = true;
49950 /* If expand_vec_perm_interleave3 can expand this into
49951 a 3 insn sequence, give up and let it be expanded as
49952 3 insn sequence. While that is one insn longer,
49953 it doesn't need a memory operand and in the common
49954 case that both interleave low and high permutations
49955 with the same operands are adjacent needs 4 insns
49956 for both after CSE. */
49957 if (expand_vec_perm_interleave3 (&dfinal))
49963 /* Examine from whence the elements come. */
49965 for (i = 0; i < nelt; ++i)
49966 contents |= HOST_WIDE_INT_1U << d->perm[i];
49968 memset (remap, 0xff, sizeof (remap));
49971 if (GET_MODE_SIZE (d->vmode) == 16)
49973 unsigned HOST_WIDE_INT h1, h2, h3, h4;
49975 /* Split the two input vectors into 4 halves. */
49976 h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
49981 /* If the elements from the low halves use interleave low, and similarly
49982 for interleave high. If the elements are from mis-matched halves, we
49983 can use shufps for V4SF/V4SI or do a DImode shuffle. */
49984 if ((contents & (h1 | h3)) == contents)
49987 for (i = 0; i < nelt2; ++i)
49990 remap[i + nelt] = i * 2 + 1;
49991 dremap.perm[i * 2] = i;
49992 dremap.perm[i * 2 + 1] = i + nelt;
49994 if (!TARGET_SSE2 && d->vmode == V4SImode)
49995 dremap.vmode = V4SFmode;
49997 else if ((contents & (h2 | h4)) == contents)
50000 for (i = 0; i < nelt2; ++i)
50002 remap[i + nelt2] = i * 2;
50003 remap[i + nelt + nelt2] = i * 2 + 1;
50004 dremap.perm[i * 2] = i + nelt2;
50005 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
50007 if (!TARGET_SSE2 && d->vmode == V4SImode)
50008 dremap.vmode = V4SFmode;
50010 else if ((contents & (h1 | h4)) == contents)
50013 for (i = 0; i < nelt2; ++i)
50016 remap[i + nelt + nelt2] = i + nelt2;
50017 dremap.perm[i] = i;
50018 dremap.perm[i + nelt2] = i + nelt + nelt2;
50023 dremap.vmode = V2DImode;
50025 dremap.perm[0] = 0;
50026 dremap.perm[1] = 3;
50029 else if ((contents & (h2 | h3)) == contents)
50032 for (i = 0; i < nelt2; ++i)
50034 remap[i + nelt2] = i;
50035 remap[i + nelt] = i + nelt2;
50036 dremap.perm[i] = i + nelt2;
50037 dremap.perm[i + nelt2] = i + nelt;
50042 dremap.vmode = V2DImode;
50044 dremap.perm[0] = 1;
50045 dremap.perm[1] = 2;
50053 unsigned int nelt4 = nelt / 4, nzcnt = 0;
50054 unsigned HOST_WIDE_INT q[8];
50055 unsigned int nonzero_halves[4];
50057 /* Split the two input vectors into 8 quarters. */
50058 q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
50059 for (i = 1; i < 8; ++i)
50060 q[i] = q[0] << (nelt4 * i);
50061 for (i = 0; i < 4; ++i)
50062 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
50064 nonzero_halves[nzcnt] = i;
50070 gcc_assert (d->one_operand_p);
50071 nonzero_halves[1] = nonzero_halves[0];
50072 same_halves = true;
50074 else if (d->one_operand_p)
50076 gcc_assert (nonzero_halves[0] == 0);
50077 gcc_assert (nonzero_halves[1] == 1);
50082 if (d->perm[0] / nelt2 == nonzero_halves[1])
50084 /* Attempt to increase the likelihood that dfinal
50085 shuffle will be intra-lane. */
50086 std::swap (nonzero_halves[0], nonzero_halves[1]);
50089 /* vperm2f128 or vperm2i128. */
50090 for (i = 0; i < nelt2; ++i)
50092 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
50093 remap[i + nonzero_halves[0] * nelt2] = i;
50094 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
50095 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
50098 if (d->vmode != V8SFmode
50099 && d->vmode != V4DFmode
50100 && d->vmode != V8SImode)
50102 dremap.vmode = V8SImode;
50104 for (i = 0; i < 4; ++i)
50106 dremap.perm[i] = i + nonzero_halves[0] * 4;
50107 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
50111 else if (d->one_operand_p)
50113 else if (TARGET_AVX2
50114 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
50117 for (i = 0; i < nelt4; ++i)
50120 remap[i + nelt] = i * 2 + 1;
50121 remap[i + nelt2] = i * 2 + nelt2;
50122 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
50123 dremap.perm[i * 2] = i;
50124 dremap.perm[i * 2 + 1] = i + nelt;
50125 dremap.perm[i * 2 + nelt2] = i + nelt2;
50126 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
50129 else if (TARGET_AVX2
50130 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
50133 for (i = 0; i < nelt4; ++i)
50135 remap[i + nelt4] = i * 2;
50136 remap[i + nelt + nelt4] = i * 2 + 1;
50137 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
50138 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
50139 dremap.perm[i * 2] = i + nelt4;
50140 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
50141 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
50142 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
50149 /* Use the remapping array set up above to move the elements from their
50150 swizzled locations into their final destinations. */
50152 for (i = 0; i < nelt; ++i)
50154 unsigned e = remap[d->perm[i]];
50155 gcc_assert (e < nelt);
50156 /* If same_halves is true, both halves of the remapped vector are the
50157 same. Avoid cross-lane accesses if possible. */
50158 if (same_halves && i >= nelt2)
50160 gcc_assert (e < nelt2);
50161 dfinal.perm[i] = e + nelt2;
50164 dfinal.perm[i] = e;
50168 dremap.target = gen_reg_rtx (dremap.vmode);
50169 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
50171 dfinal.op1 = dfinal.op0;
50172 dfinal.one_operand_p = true;
50174 /* Test if the final remap can be done with a single insn. For V4SFmode or
50175 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
50177 ok = expand_vec_perm_1 (&dfinal);
50178 seq = get_insns ();
50187 if (dremap.vmode != dfinal.vmode)
50189 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
50190 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
50193 ok = expand_vec_perm_1 (&dremap);
50200 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
50201 a single vector cross-lane permutation into vpermq followed
50202 by any of the single insn permutations. */
50205 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
50207 struct expand_vec_perm_d dremap, dfinal;
50208 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
50209 unsigned contents[2];
50213 && (d->vmode == V32QImode || d->vmode == V16HImode)
50214 && d->one_operand_p))
50219 for (i = 0; i < nelt2; ++i)
50221 contents[0] |= 1u << (d->perm[i] / nelt4);
50222 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
50225 for (i = 0; i < 2; ++i)
50227 unsigned int cnt = 0;
50228 for (j = 0; j < 4; ++j)
50229 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
50237 dremap.vmode = V4DImode;
50239 dremap.target = gen_reg_rtx (V4DImode);
50240 dremap.op0 = gen_lowpart (V4DImode, d->op0);
50241 dremap.op1 = dremap.op0;
50242 dremap.one_operand_p = true;
50243 for (i = 0; i < 2; ++i)
50245 unsigned int cnt = 0;
50246 for (j = 0; j < 4; ++j)
50247 if ((contents[i] & (1u << j)) != 0)
50248 dremap.perm[2 * i + cnt++] = j;
50249 for (; cnt < 2; ++cnt)
50250 dremap.perm[2 * i + cnt] = 0;
50254 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
50255 dfinal.op1 = dfinal.op0;
50256 dfinal.one_operand_p = true;
50257 for (i = 0, j = 0; i < nelt; ++i)
50261 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
50262 if ((d->perm[i] / nelt4) == dremap.perm[j])
50264 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
50265 dfinal.perm[i] |= nelt4;
50267 gcc_unreachable ();
50270 ok = expand_vec_perm_1 (&dremap);
50273 ok = expand_vec_perm_1 (&dfinal);
50279 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
50280 a vector permutation using two instructions, vperm2f128 resp.
50281 vperm2i128 followed by any single in-lane permutation. */
50284 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
50286 struct expand_vec_perm_d dfirst, dsecond;
50287 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
50291 || GET_MODE_SIZE (d->vmode) != 32
50292 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
50296 dsecond.one_operand_p = false;
50297 dsecond.testing_p = true;
50299 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
50300 immediate. For perm < 16 the second permutation uses
50301 d->op0 as first operand, for perm >= 16 it uses d->op1
50302 as first operand. The second operand is the result of
50304 for (perm = 0; perm < 32; perm++)
50306 /* Ignore permutations which do not move anything cross-lane. */
50309 /* The second shuffle for e.g. V4DFmode has
50310 0123 and ABCD operands.
50311 Ignore AB23, as 23 is already in the second lane
50312 of the first operand. */
50313 if ((perm & 0xc) == (1 << 2)) continue;
50314 /* And 01CD, as 01 is in the first lane of the first
50316 if ((perm & 3) == 0) continue;
50317 /* And 4567, as then the vperm2[fi]128 doesn't change
50318 anything on the original 4567 second operand. */
50319 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
50323 /* The second shuffle for e.g. V4DFmode has
50324 4567 and ABCD operands.
50325 Ignore AB67, as 67 is already in the second lane
50326 of the first operand. */
50327 if ((perm & 0xc) == (3 << 2)) continue;
50328 /* And 45CD, as 45 is in the first lane of the first
50330 if ((perm & 3) == 2) continue;
50331 /* And 0123, as then the vperm2[fi]128 doesn't change
50332 anything on the original 0123 first operand. */
50333 if ((perm & 0xf) == (1 << 2)) continue;
50336 for (i = 0; i < nelt; i++)
50338 j = d->perm[i] / nelt2;
50339 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
50340 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
50341 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
50342 dsecond.perm[i] = d->perm[i] & (nelt - 1);
50350 ok = expand_vec_perm_1 (&dsecond);
50361 /* Found a usable second shuffle. dfirst will be
50362 vperm2f128 on d->op0 and d->op1. */
50363 dsecond.testing_p = false;
50365 dfirst.target = gen_reg_rtx (d->vmode);
50366 for (i = 0; i < nelt; i++)
50367 dfirst.perm[i] = (i & (nelt2 - 1))
50368 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
50370 canonicalize_perm (&dfirst);
50371 ok = expand_vec_perm_1 (&dfirst);
50374 /* And dsecond is some single insn shuffle, taking
50375 d->op0 and result of vperm2f128 (if perm < 16) or
50376 d->op1 and result of vperm2f128 (otherwise). */
50378 dsecond.op0 = dsecond.op1;
50379 dsecond.op1 = dfirst.target;
50381 ok = expand_vec_perm_1 (&dsecond);
50387 /* For one operand, the only useful vperm2f128 permutation is 0x01
50389 if (d->one_operand_p)
50396 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
50397 a two vector permutation using 2 intra-lane interleave insns
50398 and cross-lane shuffle for 32-byte vectors. */
50401 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
50404 rtx (*gen) (rtx, rtx, rtx);
50406 if (d->one_operand_p)
50408 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
50410 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
50416 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
50418 for (i = 0; i < nelt; i += 2)
50419 if (d->perm[i] != d->perm[0] + i / 2
50420 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
50430 gen = gen_vec_interleave_highv32qi;
50432 gen = gen_vec_interleave_lowv32qi;
50436 gen = gen_vec_interleave_highv16hi;
50438 gen = gen_vec_interleave_lowv16hi;
50442 gen = gen_vec_interleave_highv8si;
50444 gen = gen_vec_interleave_lowv8si;
50448 gen = gen_vec_interleave_highv4di;
50450 gen = gen_vec_interleave_lowv4di;
50454 gen = gen_vec_interleave_highv8sf;
50456 gen = gen_vec_interleave_lowv8sf;
50460 gen = gen_vec_interleave_highv4df;
50462 gen = gen_vec_interleave_lowv4df;
50465 gcc_unreachable ();
50468 emit_insn (gen (d->target, d->op0, d->op1));
50472 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
50473 a single vector permutation using a single intra-lane vector
50474 permutation, vperm2f128 swapping the lanes and vblend* insn blending
50475 the non-swapped and swapped vectors together. */
50478 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
50480 struct expand_vec_perm_d dfirst, dsecond;
50481 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
50484 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
50488 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
50489 || !d->one_operand_p)
50493 for (i = 0; i < nelt; i++)
50494 dfirst.perm[i] = 0xff;
50495 for (i = 0, msk = 0; i < nelt; i++)
50497 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
50498 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
50500 dfirst.perm[j] = d->perm[i];
50504 for (i = 0; i < nelt; i++)
50505 if (dfirst.perm[i] == 0xff)
50506 dfirst.perm[i] = i;
50509 dfirst.target = gen_reg_rtx (dfirst.vmode);
50512 ok = expand_vec_perm_1 (&dfirst);
50513 seq = get_insns ();
50525 dsecond.op0 = dfirst.target;
50526 dsecond.op1 = dfirst.target;
50527 dsecond.one_operand_p = true;
50528 dsecond.target = gen_reg_rtx (dsecond.vmode);
50529 for (i = 0; i < nelt; i++)
50530 dsecond.perm[i] = i ^ nelt2;
50532 ok = expand_vec_perm_1 (&dsecond);
50535 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
50536 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
50540 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
50541 permutation using two vperm2f128, followed by a vshufpd insn blending
50542 the two vectors together. */
50545 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
50547 struct expand_vec_perm_d dfirst, dsecond, dthird;
50550 if (!TARGET_AVX || (d->vmode != V4DFmode))
50560 dfirst.perm[0] = (d->perm[0] & ~1);
50561 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
50562 dfirst.perm[2] = (d->perm[2] & ~1);
50563 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
50564 dsecond.perm[0] = (d->perm[1] & ~1);
50565 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
50566 dsecond.perm[2] = (d->perm[3] & ~1);
50567 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
50568 dthird.perm[0] = (d->perm[0] % 2);
50569 dthird.perm[1] = (d->perm[1] % 2) + 4;
50570 dthird.perm[2] = (d->perm[2] % 2) + 2;
50571 dthird.perm[3] = (d->perm[3] % 2) + 6;
50573 dfirst.target = gen_reg_rtx (dfirst.vmode);
50574 dsecond.target = gen_reg_rtx (dsecond.vmode);
50575 dthird.op0 = dfirst.target;
50576 dthird.op1 = dsecond.target;
50577 dthird.one_operand_p = false;
50579 canonicalize_perm (&dfirst);
50580 canonicalize_perm (&dsecond);
50582 ok = expand_vec_perm_1 (&dfirst)
50583 && expand_vec_perm_1 (&dsecond)
50584 && expand_vec_perm_1 (&dthird);
50591 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
50592 permutation with two pshufb insns and an ior. We should have already
50593 failed all two instruction sequences. */
50596 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
50598 rtx rperm[2][16], vperm, l, h, op, m128;
50599 unsigned int i, nelt, eltsz;
50601 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
50603 gcc_assert (!d->one_operand_p);
50609 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
50611 /* Generate two permutation masks. If the required element is within
50612 the given vector it is shuffled into the proper lane. If the required
50613 element is in the other vector, force a zero into the lane by setting
50614 bit 7 in the permutation mask. */
50615 m128 = GEN_INT (-128);
50616 for (i = 0; i < nelt; ++i)
50618 unsigned j, e = d->perm[i];
50619 unsigned which = (e >= nelt);
50623 for (j = 0; j < eltsz; ++j)
50625 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
50626 rperm[1-which][i*eltsz + j] = m128;
50630 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
50631 vperm = force_reg (V16QImode, vperm);
50633 l = gen_reg_rtx (V16QImode);
50634 op = gen_lowpart (V16QImode, d->op0);
50635 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
50637 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
50638 vperm = force_reg (V16QImode, vperm);
50640 h = gen_reg_rtx (V16QImode);
50641 op = gen_lowpart (V16QImode, d->op1);
50642 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
50645 if (d->vmode != V16QImode)
50646 op = gen_reg_rtx (V16QImode);
50647 emit_insn (gen_iorv16qi3 (op, l, h));
50648 if (op != d->target)
50649 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
50654 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
50655 with two vpshufb insns, vpermq and vpor. We should have already failed
50656 all two or three instruction sequences. */
50659 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
50661 rtx rperm[2][32], vperm, l, h, hp, op, m128;
50662 unsigned int i, nelt, eltsz;
50665 || !d->one_operand_p
50666 || (d->vmode != V32QImode && d->vmode != V16HImode))
50673 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
50675 /* Generate two permutation masks. If the required element is within
50676 the same lane, it is shuffled in. If the required element from the
50677 other lane, force a zero by setting bit 7 in the permutation mask.
50678 In the other mask the mask has non-negative elements if element
50679 is requested from the other lane, but also moved to the other lane,
50680 so that the result of vpshufb can have the two V2TImode halves
50682 m128 = GEN_INT (-128);
50683 for (i = 0; i < nelt; ++i)
50685 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
50686 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
50688 for (j = 0; j < eltsz; ++j)
50690 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
50691 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
50695 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
50696 vperm = force_reg (V32QImode, vperm);
50698 h = gen_reg_rtx (V32QImode);
50699 op = gen_lowpart (V32QImode, d->op0);
50700 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
50702 /* Swap the 128-byte lanes of h into hp. */
50703 hp = gen_reg_rtx (V4DImode);
50704 op = gen_lowpart (V4DImode, h);
50705 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
50708 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
50709 vperm = force_reg (V32QImode, vperm);
50711 l = gen_reg_rtx (V32QImode);
50712 op = gen_lowpart (V32QImode, d->op0);
50713 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
50716 if (d->vmode != V32QImode)
50717 op = gen_reg_rtx (V32QImode);
50718 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
50719 if (op != d->target)
50720 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
50725 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
50726 and extract-odd permutations of two V32QImode and V16QImode operand
50727 with two vpshufb insns, vpor and vpermq. We should have already
50728 failed all two or three instruction sequences. */
50731 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
50733 rtx rperm[2][32], vperm, l, h, ior, op, m128;
50734 unsigned int i, nelt, eltsz;
50737 || d->one_operand_p
50738 || (d->vmode != V32QImode && d->vmode != V16HImode))
50741 for (i = 0; i < d->nelt; ++i)
50742 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
50749 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
50751 /* Generate two permutation masks. In the first permutation mask
50752 the first quarter will contain indexes for the first half
50753 of the op0, the second quarter will contain bit 7 set, third quarter
50754 will contain indexes for the second half of the op0 and the
50755 last quarter bit 7 set. In the second permutation mask
50756 the first quarter will contain bit 7 set, the second quarter
50757 indexes for the first half of the op1, the third quarter bit 7 set
50758 and last quarter indexes for the second half of the op1.
50759 I.e. the first mask e.g. for V32QImode extract even will be:
50760 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
50761 (all values masked with 0xf except for -128) and second mask
50762 for extract even will be
50763 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
50764 m128 = GEN_INT (-128);
50765 for (i = 0; i < nelt; ++i)
50767 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
50768 unsigned which = d->perm[i] >= nelt;
50769 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
50771 for (j = 0; j < eltsz; ++j)
50773 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
50774 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
50778 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
50779 vperm = force_reg (V32QImode, vperm);
50781 l = gen_reg_rtx (V32QImode);
50782 op = gen_lowpart (V32QImode, d->op0);
50783 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
50785 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
50786 vperm = force_reg (V32QImode, vperm);
50788 h = gen_reg_rtx (V32QImode);
50789 op = gen_lowpart (V32QImode, d->op1);
50790 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
50792 ior = gen_reg_rtx (V32QImode);
50793 emit_insn (gen_iorv32qi3 (ior, l, h));
50795 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
50796 op = gen_reg_rtx (V4DImode);
50797 ior = gen_lowpart (V4DImode, ior);
50798 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
50799 const1_rtx, GEN_INT (3)));
50800 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
50805 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
50806 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
50807 with two "and" and "pack" or two "shift" and "pack" insns. We should
50808 have already failed all two instruction sequences. */
50811 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
50813 rtx op, dop0, dop1, t, rperm[16];
50814 unsigned i, odd, c, s, nelt = d->nelt;
50815 bool end_perm = false;
50816 machine_mode half_mode;
50817 rtx (*gen_and) (rtx, rtx, rtx);
50818 rtx (*gen_pack) (rtx, rtx, rtx);
50819 rtx (*gen_shift) (rtx, rtx, rtx);
50821 if (d->one_operand_p)
50827 /* Required for "pack". */
50828 if (!TARGET_SSE4_1)
50832 half_mode = V4SImode;
50833 gen_and = gen_andv4si3;
50834 gen_pack = gen_sse4_1_packusdw;
50835 gen_shift = gen_lshrv4si3;
50838 /* No check as all instructions are SSE2. */
50841 half_mode = V8HImode;
50842 gen_and = gen_andv8hi3;
50843 gen_pack = gen_sse2_packuswb;
50844 gen_shift = gen_lshrv8hi3;
50851 half_mode = V8SImode;
50852 gen_and = gen_andv8si3;
50853 gen_pack = gen_avx2_packusdw;
50854 gen_shift = gen_lshrv8si3;
50862 half_mode = V16HImode;
50863 gen_and = gen_andv16hi3;
50864 gen_pack = gen_avx2_packuswb;
50865 gen_shift = gen_lshrv16hi3;
50869 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
50870 general shuffles. */
50874 /* Check that permutation is even or odd. */
50879 for (i = 1; i < nelt; ++i)
50880 if (d->perm[i] != 2 * i + odd)
50886 dop0 = gen_reg_rtx (half_mode);
50887 dop1 = gen_reg_rtx (half_mode);
50890 for (i = 0; i < nelt / 2; i++)
50891 rperm[i] = GEN_INT (c);
50892 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
50893 t = force_reg (half_mode, t);
50894 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
50895 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
50899 emit_insn (gen_shift (dop0,
50900 gen_lowpart (half_mode, d->op0),
50902 emit_insn (gen_shift (dop1,
50903 gen_lowpart (half_mode, d->op1),
50906 /* In AVX2 for 256 bit case we need to permute pack result. */
50907 if (TARGET_AVX2 && end_perm)
50909 op = gen_reg_rtx (d->vmode);
50910 t = gen_reg_rtx (V4DImode);
50911 emit_insn (gen_pack (op, dop0, dop1));
50912 emit_insn (gen_avx2_permv4di_1 (t,
50913 gen_lowpart (V4DImode, op),
50918 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
50921 emit_insn (gen_pack (d->target, dop0, dop1));
50926 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
50927 and extract-odd permutations of two V64QI operands
50928 with two "shifts", two "truncs" and one "concat" insns for "odd"
50929 and two "truncs" and one concat insn for "even."
50930 Have already failed all two instruction sequences. */
50933 expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d)
50935 rtx t1, t2, t3, t4;
50936 unsigned i, odd, nelt = d->nelt;
50938 if (!TARGET_AVX512BW
50939 || d->one_operand_p
50940 || d->vmode != V64QImode)
50943 /* Check that permutation is even or odd. */
50948 for (i = 1; i < nelt; ++i)
50949 if (d->perm[i] != 2 * i + odd)
50958 t1 = gen_reg_rtx (V32HImode);
50959 t2 = gen_reg_rtx (V32HImode);
50960 emit_insn (gen_lshrv32hi3 (t1,
50961 gen_lowpart (V32HImode, d->op0),
50963 emit_insn (gen_lshrv32hi3 (t2,
50964 gen_lowpart (V32HImode, d->op1),
50969 t1 = gen_lowpart (V32HImode, d->op0);
50970 t2 = gen_lowpart (V32HImode, d->op1);
50973 t3 = gen_reg_rtx (V32QImode);
50974 t4 = gen_reg_rtx (V32QImode);
50975 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1));
50976 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2));
50977 emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4));
50982 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
50983 and extract-odd permutations. */
50986 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
50988 rtx t1, t2, t3, t4, t5;
50995 t1 = gen_reg_rtx (V4DFmode);
50996 t2 = gen_reg_rtx (V4DFmode);
50998 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
50999 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
51000 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
51002 /* Now an unpck[lh]pd will produce the result required. */
51004 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
51006 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
51012 int mask = odd ? 0xdd : 0x88;
51016 t1 = gen_reg_rtx (V8SFmode);
51017 t2 = gen_reg_rtx (V8SFmode);
51018 t3 = gen_reg_rtx (V8SFmode);
51020 /* Shuffle within the 128-bit lanes to produce:
51021 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
51022 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
51025 /* Shuffle the lanes around to produce:
51026 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
51027 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
51030 /* Shuffle within the 128-bit lanes to produce:
51031 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
51032 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
51034 /* Shuffle within the 128-bit lanes to produce:
51035 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
51036 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
51038 /* Shuffle the lanes around to produce:
51039 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
51040 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
51049 /* These are always directly implementable by expand_vec_perm_1. */
51050 gcc_unreachable ();
51054 return expand_vec_perm_even_odd_pack (d);
51055 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
51056 return expand_vec_perm_pshufb2 (d);
51061 /* We need 2*log2(N)-1 operations to achieve odd/even
51062 with interleave. */
51063 t1 = gen_reg_rtx (V8HImode);
51064 t2 = gen_reg_rtx (V8HImode);
51065 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
51066 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
51067 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
51068 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
51070 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
51072 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
51078 return expand_vec_perm_even_odd_pack (d);
51082 return expand_vec_perm_even_odd_pack (d);
51085 return expand_vec_perm_even_odd_trunc (d);
51090 struct expand_vec_perm_d d_copy = *d;
51091 d_copy.vmode = V4DFmode;
51093 d_copy.target = gen_raw_REG (V4DFmode, LAST_VIRTUAL_REGISTER + 1);
51095 d_copy.target = gen_reg_rtx (V4DFmode);
51096 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
51097 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
51098 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
51101 emit_move_insn (d->target,
51102 gen_lowpart (V4DImode, d_copy.target));
51111 t1 = gen_reg_rtx (V4DImode);
51112 t2 = gen_reg_rtx (V4DImode);
51114 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
51115 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
51116 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
51118 /* Now an vpunpck[lh]qdq will produce the result required. */
51120 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
51122 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
51129 struct expand_vec_perm_d d_copy = *d;
51130 d_copy.vmode = V8SFmode;
51132 d_copy.target = gen_raw_REG (V8SFmode, LAST_VIRTUAL_REGISTER + 1);
51134 d_copy.target = gen_reg_rtx (V8SFmode);
51135 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
51136 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
51137 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
51140 emit_move_insn (d->target,
51141 gen_lowpart (V8SImode, d_copy.target));
51150 t1 = gen_reg_rtx (V8SImode);
51151 t2 = gen_reg_rtx (V8SImode);
51152 t3 = gen_reg_rtx (V4DImode);
51153 t4 = gen_reg_rtx (V4DImode);
51154 t5 = gen_reg_rtx (V4DImode);
51156 /* Shuffle the lanes around into
51157 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
51158 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
51159 gen_lowpart (V4DImode, d->op1),
51161 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
51162 gen_lowpart (V4DImode, d->op1),
51165 /* Swap the 2nd and 3rd position in each lane into
51166 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
51167 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
51168 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
51169 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
51170 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
51172 /* Now an vpunpck[lh]qdq will produce
51173 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
51175 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
51176 gen_lowpart (V4DImode, t2));
51178 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
51179 gen_lowpart (V4DImode, t2));
51181 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
51185 gcc_unreachable ();
51191 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
51192 extract-even and extract-odd permutations. */
51195 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
51197 unsigned i, odd, nelt = d->nelt;
51200 if (odd != 0 && odd != 1)
51203 for (i = 1; i < nelt; ++i)
51204 if (d->perm[i] != 2 * i + odd)
51207 return expand_vec_perm_even_odd_1 (d, odd);
51210 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
51211 permutations. We assume that expand_vec_perm_1 has already failed. */
51214 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
51216 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
51217 machine_mode vmode = d->vmode;
51218 unsigned char perm2[4];
51219 rtx op0 = d->op0, dest;
51226 /* These are special-cased in sse.md so that we can optionally
51227 use the vbroadcast instruction. They expand to two insns
51228 if the input happens to be in a register. */
51229 gcc_unreachable ();
51235 /* These are always implementable using standard shuffle patterns. */
51236 gcc_unreachable ();
51240 /* These can be implemented via interleave. We save one insn by
51241 stopping once we have promoted to V4SImode and then use pshufd. */
51247 rtx (*gen) (rtx, rtx, rtx)
51248 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
51249 : gen_vec_interleave_lowv8hi;
51253 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
51254 : gen_vec_interleave_highv8hi;
51259 dest = gen_reg_rtx (vmode);
51260 emit_insn (gen (dest, op0, op0));
51261 vmode = get_mode_wider_vector (vmode);
51262 op0 = gen_lowpart (vmode, dest);
51264 while (vmode != V4SImode);
51266 memset (perm2, elt, 4);
51267 dest = gen_reg_rtx (V4SImode);
51268 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
51271 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
51279 /* For AVX2 broadcasts of the first element vpbroadcast* or
51280 vpermq should be used by expand_vec_perm_1. */
51281 gcc_assert (!TARGET_AVX2 || d->perm[0]);
51285 gcc_unreachable ();
51289 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
51290 broadcast permutations. */
51293 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
51295 unsigned i, elt, nelt = d->nelt;
51297 if (!d->one_operand_p)
51301 for (i = 1; i < nelt; ++i)
51302 if (d->perm[i] != elt)
51305 return expand_vec_perm_broadcast_1 (d);
51308 /* Implement arbitrary permutations of two V64QImode operands
51309 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
51311 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
51313 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
51319 struct expand_vec_perm_d ds[2];
51320 rtx rperm[128], vperm, target0, target1;
51321 unsigned int i, nelt;
51322 machine_mode vmode;
51327 for (i = 0; i < 2; i++)
51330 ds[i].vmode = V32HImode;
51332 ds[i].target = gen_reg_rtx (V32HImode);
51333 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
51334 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
51337 /* Prepare permutations such that the first one takes care of
51338 putting the even bytes into the right positions or one higher
51339 positions (ds[0]) and the second one takes care of
51340 putting the odd bytes into the right positions or one below
51343 for (i = 0; i < nelt; i++)
51345 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
51348 rperm[i] = constm1_rtx;
51349 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
51353 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
51354 rperm[i + 64] = constm1_rtx;
51358 bool ok = expand_vec_perm_1 (&ds[0]);
51360 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
51362 ok = expand_vec_perm_1 (&ds[1]);
51364 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
51366 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
51367 vperm = force_reg (vmode, vperm);
51368 target0 = gen_reg_rtx (V64QImode);
51369 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
51371 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
51372 vperm = force_reg (vmode, vperm);
51373 target1 = gen_reg_rtx (V64QImode);
51374 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
51376 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
51380 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
51381 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
51382 all the shorter instruction sequences. */
51385 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
51387 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
51388 unsigned int i, nelt, eltsz;
51392 || d->one_operand_p
51393 || (d->vmode != V32QImode && d->vmode != V16HImode))
51400 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
51402 /* Generate 4 permutation masks. If the required element is within
51403 the same lane, it is shuffled in. If the required element from the
51404 other lane, force a zero by setting bit 7 in the permutation mask.
51405 In the other mask the mask has non-negative elements if element
51406 is requested from the other lane, but also moved to the other lane,
51407 so that the result of vpshufb can have the two V2TImode halves
51409 m128 = GEN_INT (-128);
51410 for (i = 0; i < 32; ++i)
51412 rperm[0][i] = m128;
51413 rperm[1][i] = m128;
51414 rperm[2][i] = m128;
51415 rperm[3][i] = m128;
51421 for (i = 0; i < nelt; ++i)
51423 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
51424 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
51425 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
51427 for (j = 0; j < eltsz; ++j)
51428 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
51429 used[which] = true;
51432 for (i = 0; i < 2; ++i)
51434 if (!used[2 * i + 1])
51439 vperm = gen_rtx_CONST_VECTOR (V32QImode,
51440 gen_rtvec_v (32, rperm[2 * i + 1]));
51441 vperm = force_reg (V32QImode, vperm);
51442 h[i] = gen_reg_rtx (V32QImode);
51443 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
51444 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
51447 /* Swap the 128-byte lanes of h[X]. */
51448 for (i = 0; i < 2; ++i)
51450 if (h[i] == NULL_RTX)
51452 op = gen_reg_rtx (V4DImode);
51453 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
51454 const2_rtx, GEN_INT (3), const0_rtx,
51456 h[i] = gen_lowpart (V32QImode, op);
51459 for (i = 0; i < 2; ++i)
51466 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
51467 vperm = force_reg (V32QImode, vperm);
51468 l[i] = gen_reg_rtx (V32QImode);
51469 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
51470 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
51473 for (i = 0; i < 2; ++i)
51477 op = gen_reg_rtx (V32QImode);
51478 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
51485 gcc_assert (l[0] && l[1]);
51487 if (d->vmode != V32QImode)
51488 op = gen_reg_rtx (V32QImode);
51489 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
51490 if (op != d->target)
51491 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
51495 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
51496 With all of the interface bits taken care of, perform the expansion
51497 in D and return true on success. */
51500 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
51502 /* Try a single instruction expansion. */
51503 if (expand_vec_perm_1 (d))
51506 /* Try sequences of two instructions. */
51508 if (expand_vec_perm_pshuflw_pshufhw (d))
51511 if (expand_vec_perm_palignr (d, false))
51514 if (expand_vec_perm_interleave2 (d))
51517 if (expand_vec_perm_broadcast (d))
51520 if (expand_vec_perm_vpermq_perm_1 (d))
51523 if (expand_vec_perm_vperm2f128 (d))
51526 if (expand_vec_perm_pblendv (d))
51529 /* Try sequences of three instructions. */
51531 if (expand_vec_perm_even_odd_pack (d))
51534 if (expand_vec_perm_2vperm2f128_vshuf (d))
51537 if (expand_vec_perm_pshufb2 (d))
51540 if (expand_vec_perm_interleave3 (d))
51543 if (expand_vec_perm_vperm2f128_vblend (d))
51546 /* Try sequences of four instructions. */
51548 if (expand_vec_perm_even_odd_trunc (d))
51550 if (expand_vec_perm_vpshufb2_vpermq (d))
51553 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
51556 if (expand_vec_perm_vpermi2_vpshub2 (d))
51559 /* ??? Look for narrow permutations whose element orderings would
51560 allow the promotion to a wider mode. */
51562 /* ??? Look for sequences of interleave or a wider permute that place
51563 the data into the correct lanes for a half-vector shuffle like
51564 pshuf[lh]w or vpermilps. */
51566 /* ??? Look for sequences of interleave that produce the desired results.
51567 The combinatorics of punpck[lh] get pretty ugly... */
51569 if (expand_vec_perm_even_odd (d))
51572 /* Even longer sequences. */
51573 if (expand_vec_perm_vpshufb4_vpermq2 (d))
51576 /* See if we can get the same permutation in different vector integer
51578 struct expand_vec_perm_d nd;
51579 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
51582 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
51589 /* If a permutation only uses one operand, make it clear. Returns true
51590 if the permutation references both operands. */
51593 canonicalize_perm (struct expand_vec_perm_d *d)
51595 int i, which, nelt = d->nelt;
51597 for (i = which = 0; i < nelt; ++i)
51598 which |= (d->perm[i] < nelt ? 1 : 2);
51600 d->one_operand_p = true;
51607 if (!rtx_equal_p (d->op0, d->op1))
51609 d->one_operand_p = false;
51612 /* The elements of PERM do not suggest that only the first operand
51613 is used, but both operands are identical. Allow easier matching
51614 of the permutation by folding the permutation into the single
51619 for (i = 0; i < nelt; ++i)
51620 d->perm[i] &= nelt - 1;
51629 return (which == 3);
51633 ix86_expand_vec_perm_const (rtx operands[4])
51635 struct expand_vec_perm_d d;
51636 unsigned char perm[MAX_VECT_LEN];
51641 d.target = operands[0];
51642 d.op0 = operands[1];
51643 d.op1 = operands[2];
51646 d.vmode = GET_MODE (d.target);
51647 gcc_assert (VECTOR_MODE_P (d.vmode));
51648 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51649 d.testing_p = false;
51651 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
51652 gcc_assert (XVECLEN (sel, 0) == nelt);
51653 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
51655 for (i = 0; i < nelt; ++i)
51657 rtx e = XVECEXP (sel, 0, i);
51658 int ei = INTVAL (e) & (2 * nelt - 1);
51663 two_args = canonicalize_perm (&d);
51665 if (ix86_expand_vec_perm_const_1 (&d))
51668 /* If the selector says both arguments are needed, but the operands are the
51669 same, the above tried to expand with one_operand_p and flattened selector.
51670 If that didn't work, retry without one_operand_p; we succeeded with that
51672 if (two_args && d.one_operand_p)
51674 d.one_operand_p = false;
51675 memcpy (d.perm, perm, sizeof (perm));
51676 return ix86_expand_vec_perm_const_1 (&d);
51682 /* Implement targetm.vectorize.vec_perm_const_ok. */
51685 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
51686 const unsigned char *sel)
51688 struct expand_vec_perm_d d;
51689 unsigned int i, nelt, which;
51693 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51694 d.testing_p = true;
51696 /* Given sufficient ISA support we can just return true here
51697 for selected vector modes. */
51704 if (TARGET_AVX512F)
51705 /* All implementable with a single vpermi2 insn. */
51709 if (TARGET_AVX512BW)
51710 /* All implementable with a single vpermi2 insn. */
51714 if (TARGET_AVX512BW)
51715 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
51722 if (TARGET_AVX512VL)
51723 /* All implementable with a single vpermi2 insn. */
51728 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
51733 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
51740 /* All implementable with a single vpperm insn. */
51743 /* All implementable with 2 pshufb + 1 ior. */
51749 /* All implementable with shufpd or unpck[lh]pd. */
51755 /* Extract the values from the vector CST into the permutation
51757 memcpy (d.perm, sel, nelt);
51758 for (i = which = 0; i < nelt; ++i)
51760 unsigned char e = d.perm[i];
51761 gcc_assert (e < 2 * nelt);
51762 which |= (e < nelt ? 1 : 2);
51765 /* For all elements from second vector, fold the elements to first. */
51767 for (i = 0; i < nelt; ++i)
51770 /* Check whether the mask can be applied to the vector type. */
51771 d.one_operand_p = (which != 3);
51773 /* Implementable with shufps or pshufd. */
51774 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
51777 /* Otherwise we have to go through the motions and see if we can
51778 figure out how to generate the requested permutation. */
51779 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
51780 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
51781 if (!d.one_operand_p)
51782 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
51785 ret = ix86_expand_vec_perm_const_1 (&d);
51792 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
51794 struct expand_vec_perm_d d;
51800 d.vmode = GET_MODE (targ);
51801 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51802 d.one_operand_p = false;
51803 d.testing_p = false;
51805 for (i = 0; i < nelt; ++i)
51806 d.perm[i] = i * 2 + odd;
51808 /* We'll either be able to implement the permutation directly... */
51809 if (expand_vec_perm_1 (&d))
51812 /* ... or we use the special-case patterns. */
51813 expand_vec_perm_even_odd_1 (&d, odd);
51817 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
51819 struct expand_vec_perm_d d;
51820 unsigned i, nelt, base;
51826 d.vmode = GET_MODE (targ);
51827 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51828 d.one_operand_p = false;
51829 d.testing_p = false;
51831 base = high_p ? nelt / 2 : 0;
51832 for (i = 0; i < nelt / 2; ++i)
51834 d.perm[i * 2] = i + base;
51835 d.perm[i * 2 + 1] = i + base + nelt;
51838 /* Note that for AVX this isn't one instruction. */
51839 ok = ix86_expand_vec_perm_const_1 (&d);
51844 /* Expand a vector operation CODE for a V*QImode in terms of the
51845 same operation on V*HImode. */
51848 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
51850 machine_mode qimode = GET_MODE (dest);
51851 machine_mode himode;
51852 rtx (*gen_il) (rtx, rtx, rtx);
51853 rtx (*gen_ih) (rtx, rtx, rtx);
51854 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
51855 struct expand_vec_perm_d d;
51856 bool ok, full_interleave;
51857 bool uns_p = false;
51864 gen_il = gen_vec_interleave_lowv16qi;
51865 gen_ih = gen_vec_interleave_highv16qi;
51868 himode = V16HImode;
51869 gen_il = gen_avx2_interleave_lowv32qi;
51870 gen_ih = gen_avx2_interleave_highv32qi;
51873 himode = V32HImode;
51874 gen_il = gen_avx512bw_interleave_lowv64qi;
51875 gen_ih = gen_avx512bw_interleave_highv64qi;
51878 gcc_unreachable ();
51881 op2_l = op2_h = op2;
51885 /* Unpack data such that we've got a source byte in each low byte of
51886 each word. We don't care what goes into the high byte of each word.
51887 Rather than trying to get zero in there, most convenient is to let
51888 it be a copy of the low byte. */
51889 op2_l = gen_reg_rtx (qimode);
51890 op2_h = gen_reg_rtx (qimode);
51891 emit_insn (gen_il (op2_l, op2, op2));
51892 emit_insn (gen_ih (op2_h, op2, op2));
51895 op1_l = gen_reg_rtx (qimode);
51896 op1_h = gen_reg_rtx (qimode);
51897 emit_insn (gen_il (op1_l, op1, op1));
51898 emit_insn (gen_ih (op1_h, op1, op1));
51899 full_interleave = qimode == V16QImode;
51907 op1_l = gen_reg_rtx (himode);
51908 op1_h = gen_reg_rtx (himode);
51909 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
51910 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
51911 full_interleave = true;
51914 gcc_unreachable ();
51917 /* Perform the operation. */
51918 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
51920 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
51922 gcc_assert (res_l && res_h);
51924 /* Merge the data back into the right place. */
51926 d.op0 = gen_lowpart (qimode, res_l);
51927 d.op1 = gen_lowpart (qimode, res_h);
51929 d.nelt = GET_MODE_NUNITS (qimode);
51930 d.one_operand_p = false;
51931 d.testing_p = false;
51933 if (full_interleave)
51935 /* For SSE2, we used an full interleave, so the desired
51936 results are in the even elements. */
51937 for (i = 0; i < d.nelt; ++i)
51942 /* For AVX, the interleave used above was not cross-lane. So the
51943 extraction is evens but with the second and third quarter swapped.
51944 Happily, that is even one insn shorter than even extraction.
51945 For AVX512BW we have 4 lanes. We extract evens from within a lane,
51946 always first from the first and then from the second source operand,
51947 the index bits above the low 4 bits remains the same.
51948 Thus, for d.nelt == 32 we want permutation
51949 0,2,4,..14, 32,34,36,..46, 16,18,20,..30, 48,50,52,..62
51950 and for d.nelt == 64 we want permutation
51951 0,2,4,..14, 64,66,68,..78, 16,18,20,..30, 80,82,84,..94,
51952 32,34,36,..46, 96,98,100,..110, 48,50,52,..62, 112,114,116,..126. */
51953 for (i = 0; i < d.nelt; ++i)
51954 d.perm[i] = ((i * 2) & 14) + ((i & 8) ? d.nelt : 0) + (i & ~15);
51957 ok = ix86_expand_vec_perm_const_1 (&d);
51960 set_unique_reg_note (get_last_insn (), REG_EQUAL,
51961 gen_rtx_fmt_ee (code, qimode, op1, op2));
51964 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
51965 if op is CONST_VECTOR with all odd elements equal to their
51966 preceding element. */
51969 const_vector_equal_evenodd_p (rtx op)
51971 machine_mode mode = GET_MODE (op);
51972 int i, nunits = GET_MODE_NUNITS (mode);
51973 if (GET_CODE (op) != CONST_VECTOR
51974 || nunits != CONST_VECTOR_NUNITS (op))
51976 for (i = 0; i < nunits; i += 2)
51977 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
51983 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
51984 bool uns_p, bool odd_p)
51986 machine_mode mode = GET_MODE (op1);
51987 machine_mode wmode = GET_MODE (dest);
51989 rtx orig_op1 = op1, orig_op2 = op2;
51991 if (!nonimmediate_operand (op1, mode))
51992 op1 = force_reg (mode, op1);
51993 if (!nonimmediate_operand (op2, mode))
51994 op2 = force_reg (mode, op2);
51996 /* We only play even/odd games with vectors of SImode. */
51997 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
51999 /* If we're looking for the odd results, shift those members down to
52000 the even slots. For some cpus this is faster than a PSHUFD. */
52003 /* For XOP use vpmacsdqh, but only for smult, as it is only
52005 if (TARGET_XOP && mode == V4SImode && !uns_p)
52007 x = force_reg (wmode, CONST0_RTX (wmode));
52008 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
52012 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
52013 if (!const_vector_equal_evenodd_p (orig_op1))
52014 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
52015 x, NULL, 1, OPTAB_DIRECT);
52016 if (!const_vector_equal_evenodd_p (orig_op2))
52017 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
52018 x, NULL, 1, OPTAB_DIRECT);
52019 op1 = gen_lowpart (mode, op1);
52020 op2 = gen_lowpart (mode, op2);
52023 if (mode == V16SImode)
52026 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
52028 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
52030 else if (mode == V8SImode)
52033 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
52035 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
52038 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
52039 else if (TARGET_SSE4_1)
52040 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
52043 rtx s1, s2, t0, t1, t2;
52045 /* The easiest way to implement this without PMULDQ is to go through
52046 the motions as if we are performing a full 64-bit multiply. With
52047 the exception that we need to do less shuffling of the elements. */
52049 /* Compute the sign-extension, aka highparts, of the two operands. */
52050 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
52051 op1, pc_rtx, pc_rtx);
52052 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
52053 op2, pc_rtx, pc_rtx);
52055 /* Multiply LO(A) * HI(B), and vice-versa. */
52056 t1 = gen_reg_rtx (wmode);
52057 t2 = gen_reg_rtx (wmode);
52058 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
52059 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
52061 /* Multiply LO(A) * LO(B). */
52062 t0 = gen_reg_rtx (wmode);
52063 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
52065 /* Combine and shift the highparts into place. */
52066 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
52067 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
52070 /* Combine high and low parts. */
52071 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
52078 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
52079 bool uns_p, bool high_p)
52081 machine_mode wmode = GET_MODE (dest);
52082 machine_mode mode = GET_MODE (op1);
52083 rtx t1, t2, t3, t4, mask;
52088 t1 = gen_reg_rtx (mode);
52089 t2 = gen_reg_rtx (mode);
52090 if (TARGET_XOP && !uns_p)
52092 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
52093 shuffle the elements once so that all elements are in the right
52094 place for immediate use: { A C B D }. */
52095 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
52096 const1_rtx, GEN_INT (3)));
52097 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
52098 const1_rtx, GEN_INT (3)));
52102 /* Put the elements into place for the multiply. */
52103 ix86_expand_vec_interleave (t1, op1, op1, high_p);
52104 ix86_expand_vec_interleave (t2, op2, op2, high_p);
52107 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
52111 /* Shuffle the elements between the lanes. After this we
52112 have { A B E F | C D G H } for each operand. */
52113 t1 = gen_reg_rtx (V4DImode);
52114 t2 = gen_reg_rtx (V4DImode);
52115 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
52116 const0_rtx, const2_rtx,
52117 const1_rtx, GEN_INT (3)));
52118 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
52119 const0_rtx, const2_rtx,
52120 const1_rtx, GEN_INT (3)));
52122 /* Shuffle the elements within the lanes. After this we
52123 have { A A B B | C C D D } or { E E F F | G G H H }. */
52124 t3 = gen_reg_rtx (V8SImode);
52125 t4 = gen_reg_rtx (V8SImode);
52126 mask = GEN_INT (high_p
52127 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
52128 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
52129 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
52130 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
52132 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
52137 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
52138 uns_p, OPTAB_DIRECT);
52139 t2 = expand_binop (mode,
52140 uns_p ? umul_highpart_optab : smul_highpart_optab,
52141 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
52142 gcc_assert (t1 && t2);
52144 t3 = gen_reg_rtx (mode);
52145 ix86_expand_vec_interleave (t3, t1, t2, high_p);
52146 emit_move_insn (dest, gen_lowpart (wmode, t3));
52154 t1 = gen_reg_rtx (wmode);
52155 t2 = gen_reg_rtx (wmode);
52156 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
52157 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
52159 emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2)));
52163 gcc_unreachable ();
52168 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
52170 rtx res_1, res_2, res_3, res_4;
52172 res_1 = gen_reg_rtx (V4SImode);
52173 res_2 = gen_reg_rtx (V4SImode);
52174 res_3 = gen_reg_rtx (V2DImode);
52175 res_4 = gen_reg_rtx (V2DImode);
52176 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
52177 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
52179 /* Move the results in element 2 down to element 1; we don't care
52180 what goes in elements 2 and 3. Then we can merge the parts
52181 back together with an interleave.
52183 Note that two other sequences were tried:
52184 (1) Use interleaves at the start instead of psrldq, which allows
52185 us to use a single shufps to merge things back at the end.
52186 (2) Use shufps here to combine the two vectors, then pshufd to
52187 put the elements in the correct order.
52188 In both cases the cost of the reformatting stall was too high
52189 and the overall sequence slower. */
52191 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
52192 const0_rtx, const2_rtx,
52193 const0_rtx, const0_rtx));
52194 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
52195 const0_rtx, const2_rtx,
52196 const0_rtx, const0_rtx));
52197 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
52199 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
52203 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
52205 machine_mode mode = GET_MODE (op0);
52206 rtx t1, t2, t3, t4, t5, t6;
52208 if (TARGET_AVX512DQ && mode == V8DImode)
52209 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
52210 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
52211 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
52212 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
52213 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
52214 else if (TARGET_XOP && mode == V2DImode)
52216 /* op1: A,B,C,D, op2: E,F,G,H */
52217 op1 = gen_lowpart (V4SImode, op1);
52218 op2 = gen_lowpart (V4SImode, op2);
52220 t1 = gen_reg_rtx (V4SImode);
52221 t2 = gen_reg_rtx (V4SImode);
52222 t3 = gen_reg_rtx (V2DImode);
52223 t4 = gen_reg_rtx (V2DImode);
52226 emit_insn (gen_sse2_pshufd_1 (t1, op1,
52232 /* t2: (B*E),(A*F),(D*G),(C*H) */
52233 emit_insn (gen_mulv4si3 (t2, t1, op2));
52235 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
52236 emit_insn (gen_xop_phadddq (t3, t2));
52238 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
52239 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
52241 /* Multiply lower parts and add all */
52242 t5 = gen_reg_rtx (V2DImode);
52243 emit_insn (gen_vec_widen_umult_even_v4si (t5,
52244 gen_lowpart (V4SImode, op1),
52245 gen_lowpart (V4SImode, op2)));
52246 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
52251 machine_mode nmode;
52252 rtx (*umul) (rtx, rtx, rtx);
52254 if (mode == V2DImode)
52256 umul = gen_vec_widen_umult_even_v4si;
52259 else if (mode == V4DImode)
52261 umul = gen_vec_widen_umult_even_v8si;
52264 else if (mode == V8DImode)
52266 umul = gen_vec_widen_umult_even_v16si;
52270 gcc_unreachable ();
52273 /* Multiply low parts. */
52274 t1 = gen_reg_rtx (mode);
52275 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
52277 /* Shift input vectors right 32 bits so we can multiply high parts. */
52279 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
52280 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
52282 /* Multiply high parts by low parts. */
52283 t4 = gen_reg_rtx (mode);
52284 t5 = gen_reg_rtx (mode);
52285 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
52286 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
52288 /* Combine and shift the highparts back. */
52289 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
52290 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
52292 /* Combine high and low parts. */
52293 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
52296 set_unique_reg_note (get_last_insn (), REG_EQUAL,
52297 gen_rtx_MULT (mode, op1, op2));
52300 /* Return 1 if control tansfer instruction INSN
52301 should be encoded with bnd prefix.
52302 If insn is NULL then return 1 when control
52303 transfer instructions should be prefixed with
52304 bnd by default for current function. */
52307 ix86_bnd_prefixed_insn_p (rtx insn)
52309 /* For call insns check special flag. */
52310 if (insn && CALL_P (insn))
52312 rtx call = get_call_rtx_from (insn);
52314 return CALL_EXPR_WITH_BOUNDS_P (call);
52317 /* All other insns are prefixed only if function is instrumented. */
52318 return chkp_function_instrumented_p (current_function_decl);
52321 /* Calculate integer abs() using only SSE2 instructions. */
52324 ix86_expand_sse2_abs (rtx target, rtx input)
52326 machine_mode mode = GET_MODE (target);
52331 /* For 32-bit signed integer X, the best way to calculate the absolute
52332 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
52334 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
52335 GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
52336 NULL, 0, OPTAB_DIRECT);
52337 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
52338 NULL, 0, OPTAB_DIRECT);
52339 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
52340 target, 0, OPTAB_DIRECT);
52343 /* For 16-bit signed integer X, the best way to calculate the absolute
52344 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
52346 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
52348 x = expand_simple_binop (mode, SMAX, tmp0, input,
52349 target, 0, OPTAB_DIRECT);
52352 /* For 8-bit signed integer X, the best way to calculate the absolute
52353 value of X is min ((unsigned char) X, (unsigned char) (-X)),
52354 as SSE2 provides the PMINUB insn. */
52356 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
52358 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
52359 target, 0, OPTAB_DIRECT);
52363 gcc_unreachable ();
52367 emit_move_insn (target, x);
52370 /* Expand an extract from a vector register through pextr insn.
52371 Return true if successful. */
52374 ix86_expand_pextr (rtx *operands)
52376 rtx dst = operands[0];
52377 rtx src = operands[1];
52379 unsigned int size = INTVAL (operands[2]);
52380 unsigned int pos = INTVAL (operands[3]);
52382 if (SUBREG_P (dst))
52384 /* Reject non-lowpart subregs. */
52385 if (SUBREG_BYTE (dst) > 0)
52387 dst = SUBREG_REG (dst);
52390 if (SUBREG_P (src))
52392 pos += SUBREG_BYTE (src) * BITS_PER_UNIT;
52393 src = SUBREG_REG (src);
52396 switch (GET_MODE (src))
52405 machine_mode srcmode, dstmode;
52408 dstmode = mode_for_size (size, MODE_INT, 0);
52413 if (!TARGET_SSE4_1)
52415 srcmode = V16QImode;
52421 srcmode = V8HImode;
52425 if (!TARGET_SSE4_1)
52427 srcmode = V4SImode;
52431 gcc_assert (TARGET_64BIT);
52432 if (!TARGET_SSE4_1)
52434 srcmode = V2DImode;
52441 /* Reject extractions from misaligned positions. */
52442 if (pos & (size-1))
52445 if (GET_MODE (dst) == dstmode)
52448 d = gen_reg_rtx (dstmode);
52450 /* Construct insn pattern. */
52451 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (pos / size)));
52452 pat = gen_rtx_VEC_SELECT (dstmode, gen_lowpart (srcmode, src), pat);
52454 /* Let the rtl optimizers know about the zero extension performed. */
52455 if (dstmode == QImode || dstmode == HImode)
52457 pat = gen_rtx_ZERO_EXTEND (SImode, pat);
52458 d = gen_lowpart (SImode, d);
52461 emit_insn (gen_rtx_SET (d, pat));
52464 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
52473 /* Expand an insert into a vector register through pinsr insn.
52474 Return true if successful. */
52477 ix86_expand_pinsr (rtx *operands)
52479 rtx dst = operands[0];
52480 rtx src = operands[3];
52482 unsigned int size = INTVAL (operands[1]);
52483 unsigned int pos = INTVAL (operands[2]);
52485 if (SUBREG_P (dst))
52487 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
52488 dst = SUBREG_REG (dst);
52491 switch (GET_MODE (dst))
52500 machine_mode srcmode, dstmode;
52501 rtx (*pinsr)(rtx, rtx, rtx, rtx);
52504 srcmode = mode_for_size (size, MODE_INT, 0);
52509 if (!TARGET_SSE4_1)
52511 dstmode = V16QImode;
52512 pinsr = gen_sse4_1_pinsrb;
52518 dstmode = V8HImode;
52519 pinsr = gen_sse2_pinsrw;
52523 if (!TARGET_SSE4_1)
52525 dstmode = V4SImode;
52526 pinsr = gen_sse4_1_pinsrd;
52530 gcc_assert (TARGET_64BIT);
52531 if (!TARGET_SSE4_1)
52533 dstmode = V2DImode;
52534 pinsr = gen_sse4_1_pinsrq;
52541 /* Reject insertions to misaligned positions. */
52542 if (pos & (size-1))
52545 if (SUBREG_P (src))
52547 unsigned int srcpos = SUBREG_BYTE (src);
52553 extr_ops[0] = gen_reg_rtx (srcmode);
52554 extr_ops[1] = gen_lowpart (srcmode, SUBREG_REG (src));
52555 extr_ops[2] = GEN_INT (size);
52556 extr_ops[3] = GEN_INT (srcpos * BITS_PER_UNIT);
52558 if (!ix86_expand_pextr (extr_ops))
52564 src = gen_lowpart (srcmode, SUBREG_REG (src));
52567 if (GET_MODE (dst) == dstmode)
52570 d = gen_reg_rtx (dstmode);
52572 emit_insn (pinsr (d, gen_lowpart (dstmode, dst),
52573 gen_lowpart (srcmode, src),
52574 GEN_INT (1 << (pos / size))));
52576 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
52585 /* This function returns the calling abi specific va_list type node.
52586 It returns the FNDECL specific va_list type. */
52589 ix86_fn_abi_va_list (tree fndecl)
52592 return va_list_type_node;
52593 gcc_assert (fndecl != NULL_TREE);
52595 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
52596 return ms_va_list_type_node;
52598 return sysv_va_list_type_node;
52601 /* Returns the canonical va_list type specified by TYPE. If there
52602 is no valid TYPE provided, it return NULL_TREE. */
52605 ix86_canonical_va_list_type (tree type)
52609 /* Resolve references and pointers to va_list type. */
52610 if (TREE_CODE (type) == MEM_REF)
52611 type = TREE_TYPE (type);
52612 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
52613 type = TREE_TYPE (type);
52614 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
52615 type = TREE_TYPE (type);
52617 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
52619 wtype = va_list_type_node;
52620 gcc_assert (wtype != NULL_TREE);
52622 if (TREE_CODE (wtype) == ARRAY_TYPE)
52624 /* If va_list is an array type, the argument may have decayed
52625 to a pointer type, e.g. by being passed to another function.
52626 In that case, unwrap both types so that we can compare the
52627 underlying records. */
52628 if (TREE_CODE (htype) == ARRAY_TYPE
52629 || POINTER_TYPE_P (htype))
52631 wtype = TREE_TYPE (wtype);
52632 htype = TREE_TYPE (htype);
52635 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
52636 return va_list_type_node;
52637 wtype = sysv_va_list_type_node;
52638 gcc_assert (wtype != NULL_TREE);
52640 if (TREE_CODE (wtype) == ARRAY_TYPE)
52642 /* If va_list is an array type, the argument may have decayed
52643 to a pointer type, e.g. by being passed to another function.
52644 In that case, unwrap both types so that we can compare the
52645 underlying records. */
52646 if (TREE_CODE (htype) == ARRAY_TYPE
52647 || POINTER_TYPE_P (htype))
52649 wtype = TREE_TYPE (wtype);
52650 htype = TREE_TYPE (htype);
52653 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
52654 return sysv_va_list_type_node;
52655 wtype = ms_va_list_type_node;
52656 gcc_assert (wtype != NULL_TREE);
52658 if (TREE_CODE (wtype) == ARRAY_TYPE)
52660 /* If va_list is an array type, the argument may have decayed
52661 to a pointer type, e.g. by being passed to another function.
52662 In that case, unwrap both types so that we can compare the
52663 underlying records. */
52664 if (TREE_CODE (htype) == ARRAY_TYPE
52665 || POINTER_TYPE_P (htype))
52667 wtype = TREE_TYPE (wtype);
52668 htype = TREE_TYPE (htype);
52671 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
52672 return ms_va_list_type_node;
52675 return std_canonical_va_list_type (type);
52678 /* Iterate through the target-specific builtin types for va_list.
52679 IDX denotes the iterator, *PTREE is set to the result type of
52680 the va_list builtin, and *PNAME to its internal type.
52681 Returns zero if there is no element for this index, otherwise
52682 IDX should be increased upon the next call.
52683 Note, do not iterate a base builtin's name like __builtin_va_list.
52684 Used from c_common_nodes_and_builtins. */
52687 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
52697 *ptree = ms_va_list_type_node;
52698 *pname = "__builtin_ms_va_list";
52702 *ptree = sysv_va_list_type_node;
52703 *pname = "__builtin_sysv_va_list";
52711 #undef TARGET_SCHED_DISPATCH
52712 #define TARGET_SCHED_DISPATCH has_dispatch
52713 #undef TARGET_SCHED_DISPATCH_DO
52714 #define TARGET_SCHED_DISPATCH_DO do_dispatch
52715 #undef TARGET_SCHED_REASSOCIATION_WIDTH
52716 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
52717 #undef TARGET_SCHED_REORDER
52718 #define TARGET_SCHED_REORDER ix86_sched_reorder
52719 #undef TARGET_SCHED_ADJUST_PRIORITY
52720 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
52721 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
52722 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
52723 ix86_dependencies_evaluation_hook
52725 /* The size of the dispatch window is the total number of bytes of
52726 object code allowed in a window. */
52727 #define DISPATCH_WINDOW_SIZE 16
52729 /* Number of dispatch windows considered for scheduling. */
52730 #define MAX_DISPATCH_WINDOWS 3
52732 /* Maximum number of instructions in a window. */
52735 /* Maximum number of immediate operands in a window. */
52738 /* Maximum number of immediate bits allowed in a window. */
52739 #define MAX_IMM_SIZE 128
52741 /* Maximum number of 32 bit immediates allowed in a window. */
52742 #define MAX_IMM_32 4
52744 /* Maximum number of 64 bit immediates allowed in a window. */
52745 #define MAX_IMM_64 2
52747 /* Maximum total of loads or prefetches allowed in a window. */
52750 /* Maximum total of stores allowed in a window. */
52751 #define MAX_STORE 1
52757 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
52758 enum dispatch_group {
52773 /* Number of allowable groups in a dispatch window. It is an array
52774 indexed by dispatch_group enum. 100 is used as a big number,
52775 because the number of these kind of operations does not have any
52776 effect in dispatch window, but we need them for other reasons in
52778 static unsigned int num_allowable_groups[disp_last] = {
52779 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
52782 char group_name[disp_last + 1][16] = {
52783 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
52784 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
52785 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
52788 /* Instruction path. */
52791 path_single, /* Single micro op. */
52792 path_double, /* Double micro op. */
52793 path_multi, /* Instructions with more than 2 micro op.. */
52797 /* sched_insn_info defines a window to the instructions scheduled in
52798 the basic block. It contains a pointer to the insn_info table and
52799 the instruction scheduled.
52801 Windows are allocated for each basic block and are linked
52803 typedef struct sched_insn_info_s {
52805 enum dispatch_group group;
52806 enum insn_path path;
52811 /* Linked list of dispatch windows. This is a two way list of
52812 dispatch windows of a basic block. It contains information about
52813 the number of uops in the window and the total number of
52814 instructions and of bytes in the object code for this dispatch
52816 typedef struct dispatch_windows_s {
52817 int num_insn; /* Number of insn in the window. */
52818 int num_uops; /* Number of uops in the window. */
52819 int window_size; /* Number of bytes in the window. */
52820 int window_num; /* Window number between 0 or 1. */
52821 int num_imm; /* Number of immediates in an insn. */
52822 int num_imm_32; /* Number of 32 bit immediates in an insn. */
52823 int num_imm_64; /* Number of 64 bit immediates in an insn. */
52824 int imm_size; /* Total immediates in the window. */
52825 int num_loads; /* Total memory loads in the window. */
52826 int num_stores; /* Total memory stores in the window. */
52827 int violation; /* Violation exists in window. */
52828 sched_insn_info *window; /* Pointer to the window. */
52829 struct dispatch_windows_s *next;
52830 struct dispatch_windows_s *prev;
52831 } dispatch_windows;
52833 /* Immediate valuse used in an insn. */
52834 typedef struct imm_info_s
52841 static dispatch_windows *dispatch_window_list;
52842 static dispatch_windows *dispatch_window_list1;
52844 /* Get dispatch group of insn. */
52846 static enum dispatch_group
52847 get_mem_group (rtx_insn *insn)
52849 enum attr_memory memory;
52851 if (INSN_CODE (insn) < 0)
52852 return disp_no_group;
52853 memory = get_attr_memory (insn);
52854 if (memory == MEMORY_STORE)
52857 if (memory == MEMORY_LOAD)
52860 if (memory == MEMORY_BOTH)
52861 return disp_load_store;
52863 return disp_no_group;
52866 /* Return true if insn is a compare instruction. */
52869 is_cmp (rtx_insn *insn)
52871 enum attr_type type;
52873 type = get_attr_type (insn);
52874 return (type == TYPE_TEST
52875 || type == TYPE_ICMP
52876 || type == TYPE_FCMP
52877 || GET_CODE (PATTERN (insn)) == COMPARE);
52880 /* Return true if a dispatch violation encountered. */
52883 dispatch_violation (void)
52885 if (dispatch_window_list->next)
52886 return dispatch_window_list->next->violation;
52887 return dispatch_window_list->violation;
52890 /* Return true if insn is a branch instruction. */
52893 is_branch (rtx_insn *insn)
52895 return (CALL_P (insn) || JUMP_P (insn));
52898 /* Return true if insn is a prefetch instruction. */
52901 is_prefetch (rtx_insn *insn)
52903 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
52906 /* This function initializes a dispatch window and the list container holding a
52907 pointer to the window. */
52910 init_window (int window_num)
52913 dispatch_windows *new_list;
52915 if (window_num == 0)
52916 new_list = dispatch_window_list;
52918 new_list = dispatch_window_list1;
52920 new_list->num_insn = 0;
52921 new_list->num_uops = 0;
52922 new_list->window_size = 0;
52923 new_list->next = NULL;
52924 new_list->prev = NULL;
52925 new_list->window_num = window_num;
52926 new_list->num_imm = 0;
52927 new_list->num_imm_32 = 0;
52928 new_list->num_imm_64 = 0;
52929 new_list->imm_size = 0;
52930 new_list->num_loads = 0;
52931 new_list->num_stores = 0;
52932 new_list->violation = false;
52934 for (i = 0; i < MAX_INSN; i++)
52936 new_list->window[i].insn = NULL;
52937 new_list->window[i].group = disp_no_group;
52938 new_list->window[i].path = no_path;
52939 new_list->window[i].byte_len = 0;
52940 new_list->window[i].imm_bytes = 0;
52945 /* This function allocates and initializes a dispatch window and the
52946 list container holding a pointer to the window. */
52948 static dispatch_windows *
52949 allocate_window (void)
52951 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
52952 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
52957 /* This routine initializes the dispatch scheduling information. It
52958 initiates building dispatch scheduler tables and constructs the
52959 first dispatch window. */
52962 init_dispatch_sched (void)
52964 /* Allocate a dispatch list and a window. */
52965 dispatch_window_list = allocate_window ();
52966 dispatch_window_list1 = allocate_window ();
52971 /* This function returns true if a branch is detected. End of a basic block
52972 does not have to be a branch, but here we assume only branches end a
52976 is_end_basic_block (enum dispatch_group group)
52978 return group == disp_branch;
52981 /* This function is called when the end of a window processing is reached. */
52984 process_end_window (void)
52986 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
52987 if (dispatch_window_list->next)
52989 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
52990 gcc_assert (dispatch_window_list->window_size
52991 + dispatch_window_list1->window_size <= 48);
52997 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
52998 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
52999 for 48 bytes of instructions. Note that these windows are not dispatch
53000 windows that their sizes are DISPATCH_WINDOW_SIZE. */
53002 static dispatch_windows *
53003 allocate_next_window (int window_num)
53005 if (window_num == 0)
53007 if (dispatch_window_list->next)
53010 return dispatch_window_list;
53013 dispatch_window_list->next = dispatch_window_list1;
53014 dispatch_window_list1->prev = dispatch_window_list;
53016 return dispatch_window_list1;
53019 /* Compute number of immediate operands of an instruction. */
53022 find_constant (rtx in_rtx, imm_info *imm_values)
53024 if (INSN_P (in_rtx))
53025 in_rtx = PATTERN (in_rtx);
53026 subrtx_iterator::array_type array;
53027 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
53028 if (const_rtx x = *iter)
53029 switch (GET_CODE (x))
53034 (imm_values->imm)++;
53035 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
53036 (imm_values->imm32)++;
53038 (imm_values->imm64)++;
53042 case CONST_WIDE_INT:
53043 (imm_values->imm)++;
53044 (imm_values->imm64)++;
53048 if (LABEL_KIND (x) == LABEL_NORMAL)
53050 (imm_values->imm)++;
53051 (imm_values->imm32)++;
53060 /* Return total size of immediate operands of an instruction along with number
53061 of corresponding immediate-operands. It initializes its parameters to zero
53062 befor calling FIND_CONSTANT.
53063 INSN is the input instruction. IMM is the total of immediates.
53064 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
53068 get_num_immediates (rtx_insn *insn, int *imm, int *imm32, int *imm64)
53070 imm_info imm_values = {0, 0, 0};
53072 find_constant (insn, &imm_values);
53073 *imm = imm_values.imm;
53074 *imm32 = imm_values.imm32;
53075 *imm64 = imm_values.imm64;
53076 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
53079 /* This function indicates if an operand of an instruction is an
53083 has_immediate (rtx_insn *insn)
53085 int num_imm_operand;
53086 int num_imm32_operand;
53087 int num_imm64_operand;
53090 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
53091 &num_imm64_operand);
53095 /* Return single or double path for instructions. */
53097 static enum insn_path
53098 get_insn_path (rtx_insn *insn)
53100 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
53102 if ((int)path == 0)
53103 return path_single;
53105 if ((int)path == 1)
53106 return path_double;
53111 /* Return insn dispatch group. */
53113 static enum dispatch_group
53114 get_insn_group (rtx_insn *insn)
53116 enum dispatch_group group = get_mem_group (insn);
53120 if (is_branch (insn))
53121 return disp_branch;
53126 if (has_immediate (insn))
53129 if (is_prefetch (insn))
53130 return disp_prefetch;
53132 return disp_no_group;
53135 /* Count number of GROUP restricted instructions in a dispatch
53136 window WINDOW_LIST. */
53139 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
53141 enum dispatch_group group = get_insn_group (insn);
53143 int num_imm_operand;
53144 int num_imm32_operand;
53145 int num_imm64_operand;
53147 if (group == disp_no_group)
53150 if (group == disp_imm)
53152 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
53153 &num_imm64_operand);
53154 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
53155 || num_imm_operand + window_list->num_imm > MAX_IMM
53156 || (num_imm32_operand > 0
53157 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
53158 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
53159 || (num_imm64_operand > 0
53160 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
53161 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
53162 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
53163 && num_imm64_operand > 0
53164 && ((window_list->num_imm_64 > 0
53165 && window_list->num_insn >= 2)
53166 || window_list->num_insn >= 3)))
53172 if ((group == disp_load_store
53173 && (window_list->num_loads >= MAX_LOAD
53174 || window_list->num_stores >= MAX_STORE))
53175 || ((group == disp_load
53176 || group == disp_prefetch)
53177 && window_list->num_loads >= MAX_LOAD)
53178 || (group == disp_store
53179 && window_list->num_stores >= MAX_STORE))
53185 /* This function returns true if insn satisfies dispatch rules on the
53186 last window scheduled. */
53189 fits_dispatch_window (rtx_insn *insn)
53191 dispatch_windows *window_list = dispatch_window_list;
53192 dispatch_windows *window_list_next = dispatch_window_list->next;
53193 unsigned int num_restrict;
53194 enum dispatch_group group = get_insn_group (insn);
53195 enum insn_path path = get_insn_path (insn);
53198 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
53199 instructions should be given the lowest priority in the
53200 scheduling process in Haifa scheduler to make sure they will be
53201 scheduled in the same dispatch window as the reference to them. */
53202 if (group == disp_jcc || group == disp_cmp)
53205 /* Check nonrestricted. */
53206 if (group == disp_no_group || group == disp_branch)
53209 /* Get last dispatch window. */
53210 if (window_list_next)
53211 window_list = window_list_next;
53213 if (window_list->window_num == 1)
53215 sum = window_list->prev->window_size + window_list->window_size;
53218 || (min_insn_size (insn) + sum) >= 48)
53219 /* Window 1 is full. Go for next window. */
53223 num_restrict = count_num_restricted (insn, window_list);
53225 if (num_restrict > num_allowable_groups[group])
53228 /* See if it fits in the first window. */
53229 if (window_list->window_num == 0)
53231 /* The first widow should have only single and double path
53233 if (path == path_double
53234 && (window_list->num_uops + 2) > MAX_INSN)
53236 else if (path != path_single)
53242 /* Add an instruction INSN with NUM_UOPS micro-operations to the
53243 dispatch window WINDOW_LIST. */
53246 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
53248 int byte_len = min_insn_size (insn);
53249 int num_insn = window_list->num_insn;
53251 sched_insn_info *window = window_list->window;
53252 enum dispatch_group group = get_insn_group (insn);
53253 enum insn_path path = get_insn_path (insn);
53254 int num_imm_operand;
53255 int num_imm32_operand;
53256 int num_imm64_operand;
53258 if (!window_list->violation && group != disp_cmp
53259 && !fits_dispatch_window (insn))
53260 window_list->violation = true;
53262 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
53263 &num_imm64_operand);
53265 /* Initialize window with new instruction. */
53266 window[num_insn].insn = insn;
53267 window[num_insn].byte_len = byte_len;
53268 window[num_insn].group = group;
53269 window[num_insn].path = path;
53270 window[num_insn].imm_bytes = imm_size;
53272 window_list->window_size += byte_len;
53273 window_list->num_insn = num_insn + 1;
53274 window_list->num_uops = window_list->num_uops + num_uops;
53275 window_list->imm_size += imm_size;
53276 window_list->num_imm += num_imm_operand;
53277 window_list->num_imm_32 += num_imm32_operand;
53278 window_list->num_imm_64 += num_imm64_operand;
53280 if (group == disp_store)
53281 window_list->num_stores += 1;
53282 else if (group == disp_load
53283 || group == disp_prefetch)
53284 window_list->num_loads += 1;
53285 else if (group == disp_load_store)
53287 window_list->num_stores += 1;
53288 window_list->num_loads += 1;
53292 /* Adds a scheduled instruction, INSN, to the current dispatch window.
53293 If the total bytes of instructions or the number of instructions in
53294 the window exceed allowable, it allocates a new window. */
53297 add_to_dispatch_window (rtx_insn *insn)
53300 dispatch_windows *window_list;
53301 dispatch_windows *next_list;
53302 dispatch_windows *window0_list;
53303 enum insn_path path;
53304 enum dispatch_group insn_group;
53312 if (INSN_CODE (insn) < 0)
53315 byte_len = min_insn_size (insn);
53316 window_list = dispatch_window_list;
53317 next_list = window_list->next;
53318 path = get_insn_path (insn);
53319 insn_group = get_insn_group (insn);
53321 /* Get the last dispatch window. */
53323 window_list = dispatch_window_list->next;
53325 if (path == path_single)
53327 else if (path == path_double)
53330 insn_num_uops = (int) path;
53332 /* If current window is full, get a new window.
53333 Window number zero is full, if MAX_INSN uops are scheduled in it.
53334 Window number one is full, if window zero's bytes plus window
53335 one's bytes is 32, or if the bytes of the new instruction added
53336 to the total makes it greater than 48, or it has already MAX_INSN
53337 instructions in it. */
53338 num_insn = window_list->num_insn;
53339 num_uops = window_list->num_uops;
53340 window_num = window_list->window_num;
53341 insn_fits = fits_dispatch_window (insn);
53343 if (num_insn >= MAX_INSN
53344 || num_uops + insn_num_uops > MAX_INSN
53347 window_num = ~window_num & 1;
53348 window_list = allocate_next_window (window_num);
53351 if (window_num == 0)
53353 add_insn_window (insn, window_list, insn_num_uops);
53354 if (window_list->num_insn >= MAX_INSN
53355 && insn_group == disp_branch)
53357 process_end_window ();
53361 else if (window_num == 1)
53363 window0_list = window_list->prev;
53364 sum = window0_list->window_size + window_list->window_size;
53366 || (byte_len + sum) >= 48)
53368 process_end_window ();
53369 window_list = dispatch_window_list;
53372 add_insn_window (insn, window_list, insn_num_uops);
53375 gcc_unreachable ();
53377 if (is_end_basic_block (insn_group))
53379 /* End of basic block is reached do end-basic-block process. */
53380 process_end_window ();
53385 /* Print the dispatch window, WINDOW_NUM, to FILE. */
53387 DEBUG_FUNCTION static void
53388 debug_dispatch_window_file (FILE *file, int window_num)
53390 dispatch_windows *list;
53393 if (window_num == 0)
53394 list = dispatch_window_list;
53396 list = dispatch_window_list1;
53398 fprintf (file, "Window #%d:\n", list->window_num);
53399 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
53400 list->num_insn, list->num_uops, list->window_size);
53401 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
53402 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
53404 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
53406 fprintf (file, " insn info:\n");
53408 for (i = 0; i < MAX_INSN; i++)
53410 if (!list->window[i].insn)
53412 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
53413 i, group_name[list->window[i].group],
53414 i, (void *)list->window[i].insn,
53415 i, list->window[i].path,
53416 i, list->window[i].byte_len,
53417 i, list->window[i].imm_bytes);
53421 /* Print to stdout a dispatch window. */
53423 DEBUG_FUNCTION void
53424 debug_dispatch_window (int window_num)
53426 debug_dispatch_window_file (stdout, window_num);
53429 /* Print INSN dispatch information to FILE. */
53431 DEBUG_FUNCTION static void
53432 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
53435 enum insn_path path;
53436 enum dispatch_group group;
53438 int num_imm_operand;
53439 int num_imm32_operand;
53440 int num_imm64_operand;
53442 if (INSN_CODE (insn) < 0)
53445 byte_len = min_insn_size (insn);
53446 path = get_insn_path (insn);
53447 group = get_insn_group (insn);
53448 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
53449 &num_imm64_operand);
53451 fprintf (file, " insn info:\n");
53452 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
53453 group_name[group], path, byte_len);
53454 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
53455 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
53458 /* Print to STDERR the status of the ready list with respect to
53459 dispatch windows. */
53461 DEBUG_FUNCTION void
53462 debug_ready_dispatch (void)
53465 int no_ready = number_in_ready ();
53467 fprintf (stdout, "Number of ready: %d\n", no_ready);
53469 for (i = 0; i < no_ready; i++)
53470 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
53473 /* This routine is the driver of the dispatch scheduler. */
53476 do_dispatch (rtx_insn *insn, int mode)
53478 if (mode == DISPATCH_INIT)
53479 init_dispatch_sched ();
53480 else if (mode == ADD_TO_DISPATCH_WINDOW)
53481 add_to_dispatch_window (insn);
53484 /* Return TRUE if Dispatch Scheduling is supported. */
53487 has_dispatch (rtx_insn *insn, int action)
53489 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3
53490 || TARGET_BDVER4 || TARGET_ZNVER1) && flag_dispatch_scheduler)
53496 case IS_DISPATCH_ON:
53501 return is_cmp (insn);
53503 case DISPATCH_VIOLATION:
53504 return dispatch_violation ();
53506 case FITS_DISPATCH_WINDOW:
53507 return fits_dispatch_window (insn);
53513 /* Implementation of reassociation_width target hook used by
53514 reassoc phase to identify parallelism level in reassociated
53515 tree. Statements tree_code is passed in OPC. Arguments type
53518 Currently parallel reassociation is enabled for Atom
53519 processors only and we set reassociation width to be 2
53520 because Atom may issue up to 2 instructions per cycle.
53522 Return value should be fixed if parallel reassociation is
53523 enabled for other processors. */
53526 ix86_reassociation_width (unsigned int, machine_mode mode)
53529 if (VECTOR_MODE_P (mode))
53531 if (TARGET_VECTOR_PARALLEL_EXECUTION)
53538 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
53540 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
53541 return ((TARGET_64BIT && ix86_tune == PROCESSOR_HASWELL)? 4 : 2);
53546 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
53547 place emms and femms instructions. */
53549 static machine_mode
53550 ix86_preferred_simd_mode (machine_mode mode)
53558 return TARGET_AVX512BW ? V64QImode :
53559 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
53561 return TARGET_AVX512BW ? V32HImode :
53562 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
53564 return TARGET_AVX512F ? V16SImode :
53565 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
53567 return TARGET_AVX512F ? V8DImode :
53568 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
53571 if (TARGET_AVX512F)
53573 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
53579 if (!TARGET_VECTORIZE_DOUBLE)
53581 else if (TARGET_AVX512F)
53583 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
53585 else if (TARGET_SSE2)
53594 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
53595 vectors. If AVX512F is enabled then try vectorizing with 512bit,
53596 256bit and 128bit vectors. */
53598 static unsigned int
53599 ix86_autovectorize_vector_sizes (void)
53601 return TARGET_AVX512F ? 64 | 32 | 16 :
53602 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
53605 /* Implemenation of targetm.vectorize.get_mask_mode. */
53607 static machine_mode
53608 ix86_get_mask_mode (unsigned nunits, unsigned vector_size)
53610 unsigned elem_size = vector_size / nunits;
53612 /* Scalar mask case. */
53613 if ((TARGET_AVX512F && vector_size == 64)
53614 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
53616 if (elem_size == 4 || elem_size == 8 || TARGET_AVX512BW)
53617 return smallest_mode_for_size (nunits, MODE_INT);
53620 machine_mode elem_mode
53621 = smallest_mode_for_size (elem_size * BITS_PER_UNIT, MODE_INT);
53623 gcc_assert (elem_size * nunits == vector_size);
53625 return mode_for_vector (elem_mode, nunits);
53630 /* Return class of registers which could be used for pseudo of MODE
53631 and of class RCLASS for spilling instead of memory. Return NO_REGS
53632 if it is not possible or non-profitable. */
53634 ix86_spill_class (reg_class_t rclass, machine_mode mode)
53636 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
53637 && (mode == SImode || (TARGET_64BIT && mode == DImode))
53638 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
53639 return ALL_SSE_REGS;
53643 /* Implement targetm.vectorize.init_cost. */
53646 ix86_init_cost (struct loop *)
53648 unsigned *cost = XNEWVEC (unsigned, 3);
53649 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
53653 /* Implement targetm.vectorize.add_stmt_cost. */
53656 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
53657 struct _stmt_vec_info *stmt_info, int misalign,
53658 enum vect_cost_model_location where)
53660 unsigned *cost = (unsigned *) data;
53661 unsigned retval = 0;
53663 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
53664 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
53666 /* Statements in an inner loop relative to the loop being
53667 vectorized are weighted more heavily. The value here is
53668 arbitrary and could potentially be improved with analysis. */
53669 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
53670 count *= 50; /* FIXME. */
53672 retval = (unsigned) (count * stmt_cost);
53674 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
53675 for Silvermont as it has out of order integer pipeline and can execute
53676 2 scalar instruction per tick, but has in order SIMD pipeline. */
53677 if (TARGET_SILVERMONT || TARGET_INTEL)
53678 if (stmt_info && stmt_info->stmt)
53680 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
53681 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
53682 retval = (retval * 17) / 10;
53685 cost[where] += retval;
53690 /* Implement targetm.vectorize.finish_cost. */
53693 ix86_finish_cost (void *data, unsigned *prologue_cost,
53694 unsigned *body_cost, unsigned *epilogue_cost)
53696 unsigned *cost = (unsigned *) data;
53697 *prologue_cost = cost[vect_prologue];
53698 *body_cost = cost[vect_body];
53699 *epilogue_cost = cost[vect_epilogue];
53702 /* Implement targetm.vectorize.destroy_cost_data. */
53705 ix86_destroy_cost_data (void *data)
53710 /* Validate target specific memory model bits in VAL. */
53712 static unsigned HOST_WIDE_INT
53713 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
53715 enum memmodel model = memmodel_from_int (val);
53718 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
53720 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
53722 warning (OPT_Winvalid_memory_model,
53723 "Unknown architecture specific memory model");
53724 return MEMMODEL_SEQ_CST;
53726 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
53727 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
53729 warning (OPT_Winvalid_memory_model,
53730 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
53731 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
53733 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
53735 warning (OPT_Winvalid_memory_model,
53736 "HLE_RELEASE not used with RELEASE or stronger memory model");
53737 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
53742 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
53743 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
53744 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
53745 or number of vecsize_mangle variants that should be emitted. */
53748 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
53749 struct cgraph_simd_clone *clonei,
53750 tree base_type, int num)
53754 if (clonei->simdlen
53755 && (clonei->simdlen < 2
53756 || clonei->simdlen > 16
53757 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
53759 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
53760 "unsupported simdlen %d", clonei->simdlen);
53764 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
53765 if (TREE_CODE (ret_type) != VOID_TYPE)
53766 switch (TYPE_MODE (ret_type))
53778 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
53779 "unsupported return type %qT for simd\n", ret_type);
53786 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
53787 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
53788 switch (TYPE_MODE (TREE_TYPE (t)))
53800 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
53801 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
53805 if (clonei->cilk_elemental)
53807 /* Parse here processor clause. If not present, default to 'b'. */
53808 clonei->vecsize_mangle = 'b';
53810 else if (!TREE_PUBLIC (node->decl))
53812 /* If the function isn't exported, we can pick up just one ISA
53815 clonei->vecsize_mangle = 'd';
53816 else if (TARGET_AVX)
53817 clonei->vecsize_mangle = 'c';
53819 clonei->vecsize_mangle = 'b';
53824 clonei->vecsize_mangle = "bcd"[num];
53827 switch (clonei->vecsize_mangle)
53830 clonei->vecsize_int = 128;
53831 clonei->vecsize_float = 128;
53834 clonei->vecsize_int = 128;
53835 clonei->vecsize_float = 256;
53838 clonei->vecsize_int = 256;
53839 clonei->vecsize_float = 256;
53842 if (clonei->simdlen == 0)
53844 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
53845 clonei->simdlen = clonei->vecsize_int;
53847 clonei->simdlen = clonei->vecsize_float;
53848 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
53849 if (clonei->simdlen > 16)
53850 clonei->simdlen = 16;
53855 /* Add target attribute to SIMD clone NODE if needed. */
53858 ix86_simd_clone_adjust (struct cgraph_node *node)
53860 const char *str = NULL;
53861 gcc_assert (node->decl == cfun->decl);
53862 switch (node->simdclone->vecsize_mangle)
53877 gcc_unreachable ();
53882 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
53883 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
53886 ix86_reset_previous_fndecl ();
53887 ix86_set_current_function (node->decl);
53890 /* If SIMD clone NODE can't be used in a vectorized loop
53891 in current function, return -1, otherwise return a badness of using it
53892 (0 if it is most desirable from vecsize_mangle point of view, 1
53893 slightly less desirable, etc.). */
53896 ix86_simd_clone_usable (struct cgraph_node *node)
53898 switch (node->simdclone->vecsize_mangle)
53905 return TARGET_AVX2 ? 2 : 1;
53909 return TARGET_AVX2 ? 1 : 0;
53916 gcc_unreachable ();
53920 /* This function adjusts the unroll factor based on
53921 the hardware capabilities. For ex, bdver3 has
53922 a loop buffer which makes unrolling of smaller
53923 loops less important. This function decides the
53924 unroll factor using number of memory references
53925 (value 32 is used) as a heuristic. */
53928 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
53933 unsigned mem_count = 0;
53935 if (!TARGET_ADJUST_UNROLL)
53938 /* Count the number of memory references within the loop body.
53939 This value determines the unrolling factor for bdver3 and bdver4
53941 subrtx_iterator::array_type array;
53942 bbs = get_loop_body (loop);
53943 for (i = 0; i < loop->num_nodes; i++)
53944 FOR_BB_INSNS (bbs[i], insn)
53945 if (NONDEBUG_INSN_P (insn))
53946 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
53947 if (const_rtx x = *iter)
53950 machine_mode mode = GET_MODE (x);
53951 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
53959 if (mem_count && mem_count <=32)
53960 return 32/mem_count;
53966 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
53969 ix86_float_exceptions_rounding_supported_p (void)
53971 /* For x87 floating point with standard excess precision handling,
53972 there is no adddf3 pattern (since x87 floating point only has
53973 XFmode operations) so the default hook implementation gets this
53975 return TARGET_80387 || TARGET_SSE_MATH;
53978 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
53981 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
53983 if (!TARGET_80387 && !TARGET_SSE_MATH)
53985 tree exceptions_var = create_tmp_var_raw (integer_type_node);
53988 tree fenv_index_type = build_index_type (size_int (6));
53989 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
53990 tree fenv_var = create_tmp_var_raw (fenv_type);
53991 TREE_ADDRESSABLE (fenv_var) = 1;
53992 tree fenv_ptr = build_pointer_type (fenv_type);
53993 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
53994 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
53995 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
53996 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
53997 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
53998 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
53999 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
54000 tree hold_fnclex = build_call_expr (fnclex, 0);
54001 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
54002 NULL_TREE, NULL_TREE);
54003 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
54005 *clear = build_call_expr (fnclex, 0);
54006 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
54007 tree fnstsw_call = build_call_expr (fnstsw, 0);
54008 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
54009 sw_var, fnstsw_call);
54010 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
54011 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
54012 exceptions_var, exceptions_x87);
54013 *update = build2 (COMPOUND_EXPR, integer_type_node,
54014 sw_mod, update_mod);
54015 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
54016 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
54018 if (TARGET_SSE_MATH)
54020 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
54021 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
54022 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
54023 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
54024 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
54025 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
54026 mxcsr_orig_var, stmxcsr_hold_call);
54027 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
54029 build_int_cst (unsigned_type_node, 0x1f80));
54030 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
54031 build_int_cst (unsigned_type_node, 0xffffffc0));
54032 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
54033 mxcsr_mod_var, hold_mod_val);
54034 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
54035 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
54036 hold_assign_orig, hold_assign_mod);
54037 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
54038 ldmxcsr_hold_call);
54040 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
54043 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
54045 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
54046 ldmxcsr_clear_call);
54048 *clear = ldmxcsr_clear_call;
54049 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
54050 tree exceptions_sse = fold_convert (integer_type_node,
54051 stxmcsr_update_call);
54054 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
54055 exceptions_var, exceptions_sse);
54056 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
54057 exceptions_var, exceptions_mod);
54058 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
54059 exceptions_assign);
54062 *update = build2 (MODIFY_EXPR, integer_type_node,
54063 exceptions_var, exceptions_sse);
54064 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
54065 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
54066 ldmxcsr_update_call);
54068 tree atomic_feraiseexcept
54069 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
54070 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
54071 1, exceptions_var);
54072 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
54073 atomic_feraiseexcept_call);
54076 /* Return mode to be used for bounds or VOIDmode
54077 if bounds are not supported. */
54079 static enum machine_mode
54080 ix86_mpx_bound_mode ()
54082 /* Do not support pointer checker if MPX
54086 if (flag_check_pointer_bounds)
54087 warning (0, "Pointer Checker requires MPX support on this target."
54088 " Use -mmpx options to enable MPX.");
54095 /* Return constant used to statically initialize constant bounds.
54097 This function is used to create special bound values. For now
54098 only INIT bounds and NONE bounds are expected. More special
54099 values may be added later. */
54102 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
54104 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
54105 : build_zero_cst (pointer_sized_int_node);
54106 tree high = ub ? build_zero_cst (pointer_sized_int_node)
54107 : build_minus_one_cst (pointer_sized_int_node);
54109 /* This function is supposed to be used to create INIT and
54110 NONE bounds only. */
54111 gcc_assert ((lb == 0 && ub == -1)
54112 || (lb == -1 && ub == 0));
54114 return build_complex (NULL, low, high);
54117 /* Generate a list of statements STMTS to initialize pointer bounds
54118 variable VAR with bounds LB and UB. Return the number of generated
54122 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
54124 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
54125 tree lhs, modify, var_p;
54127 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
54128 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
54130 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
54131 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
54132 append_to_statement_list (modify, stmts);
54134 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
54135 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
54136 TYPE_SIZE_UNIT (pointer_sized_int_node)));
54137 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
54138 append_to_statement_list (modify, stmts);
54143 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
54144 /* For i386, common symbol is local only for non-PIE binaries. For
54145 x86-64, common symbol is local only for non-PIE binaries or linker
54146 supports copy reloc in PIE binaries. */
54149 ix86_binds_local_p (const_tree exp)
54151 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
54154 && HAVE_LD_PIE_COPYRELOC != 0)));
54158 /* If MEM is in the form of [base+offset], extract the two parts
54159 of address and set to BASE and OFFSET, otherwise return false. */
54162 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
54166 gcc_assert (MEM_P (mem));
54168 addr = XEXP (mem, 0);
54170 if (GET_CODE (addr) == CONST)
54171 addr = XEXP (addr, 0);
54173 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
54176 *offset = const0_rtx;
54180 if (GET_CODE (addr) == PLUS
54181 && (REG_P (XEXP (addr, 0))
54182 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
54183 && CONST_INT_P (XEXP (addr, 1)))
54185 *base = XEXP (addr, 0);
54186 *offset = XEXP (addr, 1);
54193 /* Given OPERANDS of consecutive load/store, check if we can merge
54194 them into move multiple. LOAD is true if they are load instructions.
54195 MODE is the mode of memory operands. */
54198 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
54199 enum machine_mode mode)
54201 HOST_WIDE_INT offval_1, offval_2, msize;
54202 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
54206 mem_1 = operands[1];
54207 mem_2 = operands[3];
54208 reg_1 = operands[0];
54209 reg_2 = operands[2];
54213 mem_1 = operands[0];
54214 mem_2 = operands[2];
54215 reg_1 = operands[1];
54216 reg_2 = operands[3];
54219 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
54221 if (REGNO (reg_1) != REGNO (reg_2))
54224 /* Check if the addresses are in the form of [base+offset]. */
54225 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
54227 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
54230 /* Check if the bases are the same. */
54231 if (!rtx_equal_p (base_1, base_2))
54234 offval_1 = INTVAL (offset_1);
54235 offval_2 = INTVAL (offset_2);
54236 msize = GET_MODE_SIZE (mode);
54237 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
54238 if (offval_1 + msize != offval_2)
54244 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
54247 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
54248 optimization_type opt_type)
54262 return opt_type == OPTIMIZE_FOR_SPEED;
54265 if (SSE_FLOAT_MODE_P (mode1)
54267 && !flag_trapping_math
54269 return opt_type == OPTIMIZE_FOR_SPEED;
54275 if (SSE_FLOAT_MODE_P (mode1)
54277 && !flag_trapping_math
54280 return opt_type == OPTIMIZE_FOR_SPEED;
54283 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
54290 /* Address space support.
54292 This is not "far pointers" in the 16-bit sense, but an easy way
54293 to use %fs and %gs segment prefixes. Therefore:
54295 (a) All address spaces have the same modes,
54296 (b) All address spaces have the same addresss forms,
54297 (c) While %fs and %gs are technically subsets of the generic
54298 address space, they are probably not subsets of each other.
54299 (d) Since we have no access to the segment base register values
54300 without resorting to a system call, we cannot convert a
54301 non-default address space to a default address space.
54302 Therefore we do not claim %fs or %gs are subsets of generic.
54304 Therefore we can (mostly) use the default hooks. */
54306 /* All use of segmentation is assumed to make address 0 valid. */
54309 ix86_addr_space_zero_address_valid (addr_space_t as)
54311 return as != ADDR_SPACE_GENERIC;
54313 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
54314 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
54316 /* Initialize the GCC target structure. */
54317 #undef TARGET_RETURN_IN_MEMORY
54318 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
54320 #undef TARGET_LEGITIMIZE_ADDRESS
54321 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
54323 #undef TARGET_ATTRIBUTE_TABLE
54324 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
54325 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
54326 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
54327 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
54328 # undef TARGET_MERGE_DECL_ATTRIBUTES
54329 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
54332 #undef TARGET_COMP_TYPE_ATTRIBUTES
54333 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
54335 #undef TARGET_INIT_BUILTINS
54336 #define TARGET_INIT_BUILTINS ix86_init_builtins
54337 #undef TARGET_BUILTIN_DECL
54338 #define TARGET_BUILTIN_DECL ix86_builtin_decl
54339 #undef TARGET_EXPAND_BUILTIN
54340 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
54342 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
54343 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
54344 ix86_builtin_vectorized_function
54346 #undef TARGET_VECTORIZE_BUILTIN_GATHER
54347 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
54349 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
54350 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
54352 #undef TARGET_BUILTIN_RECIPROCAL
54353 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
54355 #undef TARGET_ASM_FUNCTION_EPILOGUE
54356 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
54358 #undef TARGET_ENCODE_SECTION_INFO
54359 #ifndef SUBTARGET_ENCODE_SECTION_INFO
54360 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
54362 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
54365 #undef TARGET_ASM_OPEN_PAREN
54366 #define TARGET_ASM_OPEN_PAREN ""
54367 #undef TARGET_ASM_CLOSE_PAREN
54368 #define TARGET_ASM_CLOSE_PAREN ""
54370 #undef TARGET_ASM_BYTE_OP
54371 #define TARGET_ASM_BYTE_OP ASM_BYTE
54373 #undef TARGET_ASM_ALIGNED_HI_OP
54374 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
54375 #undef TARGET_ASM_ALIGNED_SI_OP
54376 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
54378 #undef TARGET_ASM_ALIGNED_DI_OP
54379 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
54382 #undef TARGET_PROFILE_BEFORE_PROLOGUE
54383 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
54385 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
54386 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
54388 #undef TARGET_ASM_UNALIGNED_HI_OP
54389 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
54390 #undef TARGET_ASM_UNALIGNED_SI_OP
54391 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
54392 #undef TARGET_ASM_UNALIGNED_DI_OP
54393 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
54395 #undef TARGET_PRINT_OPERAND
54396 #define TARGET_PRINT_OPERAND ix86_print_operand
54397 #undef TARGET_PRINT_OPERAND_ADDRESS
54398 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
54399 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
54400 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
54401 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
54402 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
54404 #undef TARGET_SCHED_INIT_GLOBAL
54405 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
54406 #undef TARGET_SCHED_ADJUST_COST
54407 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
54408 #undef TARGET_SCHED_ISSUE_RATE
54409 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
54410 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
54411 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
54412 ia32_multipass_dfa_lookahead
54413 #undef TARGET_SCHED_MACRO_FUSION_P
54414 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
54415 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
54416 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
54418 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
54419 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
54421 #undef TARGET_MEMMODEL_CHECK
54422 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
54424 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
54425 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
54428 #undef TARGET_HAVE_TLS
54429 #define TARGET_HAVE_TLS true
54431 #undef TARGET_CANNOT_FORCE_CONST_MEM
54432 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
54433 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
54434 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
54436 #undef TARGET_DELEGITIMIZE_ADDRESS
54437 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
54439 #undef TARGET_MS_BITFIELD_LAYOUT_P
54440 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
54443 #undef TARGET_BINDS_LOCAL_P
54444 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
54446 #undef TARGET_BINDS_LOCAL_P
54447 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
54449 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
54450 #undef TARGET_BINDS_LOCAL_P
54451 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
54454 #undef TARGET_ASM_OUTPUT_MI_THUNK
54455 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
54456 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
54457 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
54459 #undef TARGET_ASM_FILE_START
54460 #define TARGET_ASM_FILE_START x86_file_start
54462 #undef TARGET_OPTION_OVERRIDE
54463 #define TARGET_OPTION_OVERRIDE ix86_option_override
54465 #undef TARGET_REGISTER_MOVE_COST
54466 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
54467 #undef TARGET_MEMORY_MOVE_COST
54468 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
54469 #undef TARGET_RTX_COSTS
54470 #define TARGET_RTX_COSTS ix86_rtx_costs
54471 #undef TARGET_ADDRESS_COST
54472 #define TARGET_ADDRESS_COST ix86_address_cost
54474 #undef TARGET_FIXED_CONDITION_CODE_REGS
54475 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
54476 #undef TARGET_CC_MODES_COMPATIBLE
54477 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
54479 #undef TARGET_MACHINE_DEPENDENT_REORG
54480 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
54482 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
54483 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
54485 #undef TARGET_BUILD_BUILTIN_VA_LIST
54486 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
54488 #undef TARGET_FOLD_BUILTIN
54489 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
54491 #undef TARGET_COMPARE_VERSION_PRIORITY
54492 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
54494 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
54495 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
54496 ix86_generate_version_dispatcher_body
54498 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
54499 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
54500 ix86_get_function_versions_dispatcher
54502 #undef TARGET_ENUM_VA_LIST_P
54503 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
54505 #undef TARGET_FN_ABI_VA_LIST
54506 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
54508 #undef TARGET_CANONICAL_VA_LIST_TYPE
54509 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
54511 #undef TARGET_EXPAND_BUILTIN_VA_START
54512 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
54514 #undef TARGET_MD_ASM_ADJUST
54515 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
54517 #undef TARGET_PROMOTE_PROTOTYPES
54518 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
54519 #undef TARGET_SETUP_INCOMING_VARARGS
54520 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
54521 #undef TARGET_MUST_PASS_IN_STACK
54522 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
54523 #undef TARGET_FUNCTION_ARG_ADVANCE
54524 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
54525 #undef TARGET_FUNCTION_ARG
54526 #define TARGET_FUNCTION_ARG ix86_function_arg
54527 #undef TARGET_INIT_PIC_REG
54528 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
54529 #undef TARGET_USE_PSEUDO_PIC_REG
54530 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
54531 #undef TARGET_FUNCTION_ARG_BOUNDARY
54532 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
54533 #undef TARGET_PASS_BY_REFERENCE
54534 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
54535 #undef TARGET_INTERNAL_ARG_POINTER
54536 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
54537 #undef TARGET_UPDATE_STACK_BOUNDARY
54538 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
54539 #undef TARGET_GET_DRAP_RTX
54540 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
54541 #undef TARGET_STRICT_ARGUMENT_NAMING
54542 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
54543 #undef TARGET_STATIC_CHAIN
54544 #define TARGET_STATIC_CHAIN ix86_static_chain
54545 #undef TARGET_TRAMPOLINE_INIT
54546 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
54547 #undef TARGET_RETURN_POPS_ARGS
54548 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
54550 #undef TARGET_LEGITIMATE_COMBINED_INSN
54551 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
54553 #undef TARGET_ASAN_SHADOW_OFFSET
54554 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
54556 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
54557 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
54559 #undef TARGET_SCALAR_MODE_SUPPORTED_P
54560 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
54562 #undef TARGET_VECTOR_MODE_SUPPORTED_P
54563 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
54565 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
54566 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
54567 ix86_libgcc_floating_mode_supported_p
54569 #undef TARGET_C_MODE_FOR_SUFFIX
54570 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
54573 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
54574 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
54577 #ifdef SUBTARGET_INSERT_ATTRIBUTES
54578 #undef TARGET_INSERT_ATTRIBUTES
54579 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
54582 #undef TARGET_MANGLE_TYPE
54583 #define TARGET_MANGLE_TYPE ix86_mangle_type
54586 #undef TARGET_STACK_PROTECT_FAIL
54587 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
54590 #undef TARGET_FUNCTION_VALUE
54591 #define TARGET_FUNCTION_VALUE ix86_function_value
54593 #undef TARGET_FUNCTION_VALUE_REGNO_P
54594 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
54596 #undef TARGET_PROMOTE_FUNCTION_MODE
54597 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
54599 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
54600 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
54602 #undef TARGET_MEMBER_TYPE_FORCES_BLK
54603 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
54605 #undef TARGET_INSTANTIATE_DECLS
54606 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
54608 #undef TARGET_SECONDARY_RELOAD
54609 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
54611 #undef TARGET_CLASS_MAX_NREGS
54612 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
54614 #undef TARGET_PREFERRED_RELOAD_CLASS
54615 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
54616 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
54617 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
54618 #undef TARGET_CLASS_LIKELY_SPILLED_P
54619 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
54621 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
54622 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
54623 ix86_builtin_vectorization_cost
54624 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
54625 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
54626 ix86_vectorize_vec_perm_const_ok
54627 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
54628 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
54629 ix86_preferred_simd_mode
54630 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
54631 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
54632 ix86_autovectorize_vector_sizes
54633 #undef TARGET_VECTORIZE_GET_MASK_MODE
54634 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
54635 #undef TARGET_VECTORIZE_INIT_COST
54636 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
54637 #undef TARGET_VECTORIZE_ADD_STMT_COST
54638 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
54639 #undef TARGET_VECTORIZE_FINISH_COST
54640 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
54641 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
54642 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
54644 #undef TARGET_SET_CURRENT_FUNCTION
54645 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
54647 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
54648 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
54650 #undef TARGET_OPTION_SAVE
54651 #define TARGET_OPTION_SAVE ix86_function_specific_save
54653 #undef TARGET_OPTION_RESTORE
54654 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
54656 #undef TARGET_OPTION_POST_STREAM_IN
54657 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
54659 #undef TARGET_OPTION_PRINT
54660 #define TARGET_OPTION_PRINT ix86_function_specific_print
54662 #undef TARGET_OPTION_FUNCTION_VERSIONS
54663 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
54665 #undef TARGET_CAN_INLINE_P
54666 #define TARGET_CAN_INLINE_P ix86_can_inline_p
54668 #undef TARGET_LEGITIMATE_ADDRESS_P
54669 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
54671 #undef TARGET_LRA_P
54672 #define TARGET_LRA_P hook_bool_void_true
54674 #undef TARGET_REGISTER_PRIORITY
54675 #define TARGET_REGISTER_PRIORITY ix86_register_priority
54677 #undef TARGET_REGISTER_USAGE_LEVELING_P
54678 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
54680 #undef TARGET_LEGITIMATE_CONSTANT_P
54681 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
54683 #undef TARGET_FRAME_POINTER_REQUIRED
54684 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
54686 #undef TARGET_CAN_ELIMINATE
54687 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
54689 #undef TARGET_EXTRA_LIVE_ON_ENTRY
54690 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
54692 #undef TARGET_ASM_CODE_END
54693 #define TARGET_ASM_CODE_END ix86_code_end
54695 #undef TARGET_CONDITIONAL_REGISTER_USAGE
54696 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
54699 #undef TARGET_INIT_LIBFUNCS
54700 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
54703 #undef TARGET_LOOP_UNROLL_ADJUST
54704 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
54706 #undef TARGET_SPILL_CLASS
54707 #define TARGET_SPILL_CLASS ix86_spill_class
54709 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
54710 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
54711 ix86_simd_clone_compute_vecsize_and_simdlen
54713 #undef TARGET_SIMD_CLONE_ADJUST
54714 #define TARGET_SIMD_CLONE_ADJUST \
54715 ix86_simd_clone_adjust
54717 #undef TARGET_SIMD_CLONE_USABLE
54718 #define TARGET_SIMD_CLONE_USABLE \
54719 ix86_simd_clone_usable
54721 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
54722 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
54723 ix86_float_exceptions_rounding_supported_p
54725 #undef TARGET_MODE_EMIT
54726 #define TARGET_MODE_EMIT ix86_emit_mode_set
54728 #undef TARGET_MODE_NEEDED
54729 #define TARGET_MODE_NEEDED ix86_mode_needed
54731 #undef TARGET_MODE_AFTER
54732 #define TARGET_MODE_AFTER ix86_mode_after
54734 #undef TARGET_MODE_ENTRY
54735 #define TARGET_MODE_ENTRY ix86_mode_entry
54737 #undef TARGET_MODE_EXIT
54738 #define TARGET_MODE_EXIT ix86_mode_exit
54740 #undef TARGET_MODE_PRIORITY
54741 #define TARGET_MODE_PRIORITY ix86_mode_priority
54743 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
54744 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
54746 #undef TARGET_LOAD_BOUNDS_FOR_ARG
54747 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
54749 #undef TARGET_STORE_BOUNDS_FOR_ARG
54750 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
54752 #undef TARGET_LOAD_RETURNED_BOUNDS
54753 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
54755 #undef TARGET_STORE_RETURNED_BOUNDS
54756 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
54758 #undef TARGET_CHKP_BOUND_MODE
54759 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
54761 #undef TARGET_BUILTIN_CHKP_FUNCTION
54762 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
54764 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
54765 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
54767 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
54768 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
54770 #undef TARGET_CHKP_INITIALIZE_BOUNDS
54771 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
54773 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
54774 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
54776 #undef TARGET_OFFLOAD_OPTIONS
54777 #define TARGET_OFFLOAD_OPTIONS \
54778 ix86_offload_options
54780 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
54781 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
54783 #undef TARGET_OPTAB_SUPPORTED_P
54784 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
54786 struct gcc_target targetm = TARGET_INITIALIZER;
54788 #include "gt-i386.h"